Here we want to determine the precision and recall of our methods using the synthetic datasets.
# install.packages("MASS")
# install.packages("ggplot2")
# install.packages("dplyr")
# install.packages("tidyr")
# install.packages("sf") # New dependency for geometric operations (overlap calculation)
library(MASS) # For mvrnorm
library(ggplot2)
library(dplyr)
library(tidyr)
library(sf) # For spatial data operations to calculate ellipse overlap
# --- 1. Define Dataset Parameters ---
num_genes <- 300 # N = 300 genes
samples_per_condition <- 50 # M = 50 samples per condition
num_conditions <- 2 # Conditions Healthy and Diseased
num_modules <- 10 # Number of predefined gene modules
# Set a seed for reproducibility
set.seed(42)
# --- 2. Assign Known Module Membership to Genes ---
# Each gene is assigned to one of the 'num_modules'.
# Genes are divided equally among modules.
genes_per_module <- num_genes %/% num_modules # Integer division
gene_ids <- paste0("Gene_", 1:num_genes)
# Create module assignments (e.g., Gene_001 to Gene_030 are Module_1, etc.)
module_nums <- ((0:(num_genes-1)) %/% genes_per_module) + 1
gene_module_membership <- data.frame(
Gene = gene_ids,
Module = paste0("Module_", module_nums)
)
cat("--- Gene Module Membership (first 10 genes) ---\n")
--- Gene Module Membership (first 10 genes) ---
print(head(gene_module_membership, 10))
cat(paste0(rep("-", 50), collapse = ""), "\n\n")
--------------------------------------------------
# --- 3. Define Parameters for Mean Expression Profiles and Overlap on LOG SCALE ---
# These parameters now refer to the log-transformed gene expression values.
log_baseline_mean <- log(10.0) # Base mean expression level on log scale (e.g., log(10) = 2.3)
# Magnitude of the module-specific effect on the LOG scale.
# A smaller value here means less separation on the log scale, leading to more overlap
# in the PCA space of the final, exponentiated data.
# log_module_effect_magnitude <- 0.9 # This value is high, expecting low overlap.
log_module_effect_magnitude <- 0.1 # High overlap
# New: Parameter for nonlinear (cubic) correlation strength
# This controls the magnitude of the shared cubic effect within modules.
# A small value is recommended as cubic terms grow rapidly.
nonlinear_correlation_strength <- 0.1 # Adjust this value to control the cubic effect's magnitude
# --- 4. Construct Covariance Matrix for Within-Module Co-expression on LOG SCALE ---
# This matrix will define the linear correlation structure between genes on the log scale.
# Genes within the same module will be linearly correlated; genes in different modules will not.
# The cubic term will add an additional, non-linear correlation on top of this.
# Controls the base variance of each gene on the log scale
log_gene_variance <- 0.3 # Reduced to avoid excessively wide tails after exponentiation
# Controls the linear correlation strength within a module (0 to 1) on the log scale
within_module_correlation <- 0.5
# Initialize a diagonal covariance matrix (genes are initially independent)
covariance_matrix <- diag(num_genes) * log_gene_variance
colnames(covariance_matrix) <- gene_ids
rownames(covariance_matrix) <- gene_ids
# Populate the covariance matrix for within-module linear co-expression
for (i in 1:num_genes) {
for (j in i:num_genes) { # Iterate through upper triangle (matrix is symmetric)
if (i == j) {
# Diagonal elements are variances (already set)
next
}
gene1_module <- gene_module_membership$Module[gene_module_membership$Gene == gene_ids[i]]
gene2_module <- gene_module_membership$Module[gene_module_membership$Gene == gene_ids[j]]
if (gene1_module == gene2_module) {
# If genes are in the same module, set their covariance based on linear correlation
# Cov(X,Y) = Corr(X,Y) * SD(X) * SD(Y)
# Assuming SD(X) = SD(Y) = sqrt(log_gene_variance)
covariance_value <- within_module_correlation * log_gene_variance
covariance_matrix[i, j] <- covariance_value
covariance_matrix[j, i] <- covariance_value # Symmetric
}
}
}
cat("--- Sample of Covariance Matrix (first 5x5 block, on log scale) ---\n")
--- Sample of Covariance Matrix (first 5x5 block, on log scale) ---
print(round(covariance_matrix[1:5, 1:5], 2))
Gene_1 Gene_2 Gene_3 Gene_4 Gene_5
Gene_1 0.30 0.15 0.15 0.15 0.15
Gene_2 0.15 0.30 0.15 0.15 0.15
Gene_3 0.15 0.15 0.30 0.15 0.15
Gene_4 0.15 0.15 0.15 0.30 0.15
Gene_5 0.15 0.15 0.15 0.15 0.30
cat(paste0(rep("-", 50), collapse = ""), "\n\n")
--------------------------------------------------
# --- 5. Define Mean Expression Profiles for Healthy and Diseased (Module-aware, on LOG SCALE) ---
mean_profile_Healthy_log <- rep(log_baseline_mean, num_genes)
mean_profile_Diseased_log <- rep(log_baseline_mean, num_genes)
for (i in 1:num_genes) {
gene_id <- gene_ids[i]
module_id_str <- gene_module_membership$Module[gene_module_membership$Gene == gene_id]
module_num <- as.numeric(strsplit(module_id_str, "_")[[1]][2])
# Differential expression strategy:
# Modules 1 to (num_modules/2) will have higher expression in Condition Healthy
# Modules (num_modules/2 + 1) to num_modules will have higher expression in Condition Diseased
if (module_num <= num_modules / 2) {
mean_profile_Healthy_log[i] <- mean_profile_Healthy_log[i] + log_module_effect_magnitude
} else {
mean_profile_Diseased_log[i] <- mean_profile_Diseased_log[i] + log_module_effect_magnitude
}
}
cat("--- Sample of Mean Expression Profiles (first 10 genes, on log scale) ---\n")
--- Sample of Mean Expression Profiles (first 10 genes, on log scale) ---
print(data.frame(Gene = gene_ids[1:10], Mean_Healthy_log = mean_profile_Healthy_log[1:10], Mean_Diseased_log = mean_profile_Diseased_log[1:10]))
cat(paste0(rep("-", 50), collapse = ""), "\n\n")
--------------------------------------------------
# --- 6. Generate Synthetic Gene Expression Data (from Log-Normal Distribution with Cubic Term) ---
# Generate module-specific latent variables for cubic effect
# These will be shared by all genes within a module for a given sample, and are independent of mvrnorm noise
module_latent_vars_healthy <- matrix(rnorm(num_modules * samples_per_condition),
nrow = num_modules,
ncol = samples_per_condition)
module_latent_vars_diseased <- matrix(rnorm(num_modules * samples_per_condition),
nrow = num_modules,
ncol = samples_per_condition)
# Generate base data for Healthy samples (linearly correlated on log-scale)
expression_Healthy_log_base <- t(MASS::mvrnorm(n = samples_per_condition,
mu = mean_profile_Healthy_log,
Sigma = covariance_matrix))
# Add the cubic term to introduce non-linear correlation within modules
expression_Healthy_log <- expression_Healthy_log_base
for (s in 1:samples_per_condition) {
for (g_idx in 1:num_genes) {
module_num <- as.numeric(strsplit(gene_module_membership$Module[g_idx], "_")[[1]][2])
# Add the scaled cubic term
cubic_term <- nonlinear_correlation_strength * (module_latent_vars_healthy[module_num, s])^3
expression_Healthy_log[g_idx, s] <- expression_Healthy_log[g_idx, s] + cubic_term
}
}
expression_Healthy <- exp(expression_Healthy_log) # Convert from log to linear scale
colnames(expression_Healthy) <- paste0("Healthy_Sample_", 1:samples_per_condition)
rownames(expression_Healthy) <- paste0("Gene_", 1:num_genes)
# Generate base data for Diseased samples (linearly correlated on log-scale)
expression_Diseased_log_base <- t(MASS::mvrnorm(n = samples_per_condition,
mu = mean_profile_Diseased_log,
Sigma = covariance_matrix))
# Add the cubic term to introduce non-linear correlation within modules
expression_Diseased_log <- expression_Diseased_log_base
for (s in 1:samples_per_condition) {
for (g_idx in 1:num_genes) {
module_num <- as.numeric(strsplit(gene_module_membership$Module[g_idx], "_")[[1]][2])
# Add the scaled cubic term
cubic_term <- nonlinear_correlation_strength * (module_latent_vars_diseased[module_num, s])^3
expression_Diseased_log[g_idx, s] <- expression_Diseased_log[g_idx, s] + cubic_term
}
}
expression_Diseased <- exp(expression_Diseased_log) # Convert from log to linear scale
colnames(expression_Diseased) <- paste0("Diseased_Sample_", 1:samples_per_condition)
rownames(expression_Diseased) <- paste0("Gene_", 1:num_genes)
# Ensure all expression values are non-negative (clip at a small value to avoid exact zero)
expression_Healthy[expression_Healthy < 0.01] <- 0.01
expression_Diseased[expression_Diseased < 0.01] <- 0.01
# Combine into a single gene expression matrix
synthetic_expression_data <- cbind(expression_Healthy, expression_Diseased)
# --- 7. Create Sample Metadata (Phenotype/Condition Labels) ---
sample_metadata <- data.frame(
Sample_ID = colnames(synthetic_expression_data),
Condition = c(rep("Healthy", samples_per_condition), rep("Diseased", samples_per_condition))
)
rownames(sample_metadata) <- sample_metadata$Sample_ID
print("--- Synthetic Dataset Created ---")
[1] "--- Synthetic Dataset Created ---"
print(paste("Dimensions of expression data (genes x samples):",
dim(synthetic_expression_data)[1], "x", dim(synthetic_expression_data)[2]))
[1] "Dimensions of expression data (genes x samples): 300 x 100"
print("First 5 rows and 5 columns of expression data (linear scale):")
[1] "First 5 rows and 5 columns of expression data (linear scale):"
print(head(synthetic_expression_data[, 1:5]))
Healthy_Sample_1 Healthy_Sample_2 Healthy_Sample_3 Healthy_Sample_4 Healthy_Sample_5
Gene_1 39.98632 14.615295 7.536453 13.19507 9.511583
Gene_2 22.55743 10.669340 13.297870 10.86345 4.735773
Gene_3 12.54379 18.601410 11.303937 14.17025 5.552115
Gene_4 14.69860 14.041051 12.808648 27.63371 9.389464
Gene_5 17.96696 8.539735 5.759028 7.69522 7.278582
Gene_6 14.61363 16.171502 6.560317 14.67489 7.858207
print("First 5 rows of sample metadata:")
[1] "First 5 rows of sample metadata:"
print(head(sample_metadata))
cat(paste0(rep("-", 50), collapse = ""), "\n\n")
--------------------------------------------------
# --- Save synthetic_expression_data, sample_metadata, and gene_module_membership ---
# Construct filenames to include module_effect_magnitude
expression_filename <- paste0("synthetic_expression_data_effect_", log_module_effect_magnitude, ".csv")
metadata_filename <- paste0("sample_metadata_effect_", log_module_effect_magnitude, ".csv")
module_membership_filename <- paste0("gene_module_membership_effect_", log_module_effect_magnitude, ".csv")
write.csv(synthetic_expression_data, expression_filename, row.names = TRUE)
cat(sprintf("Saved %s\n", expression_filename))
Saved synthetic_expression_data_effect_0.1.csv
write.csv(sample_metadata, metadata_filename, row.names = FALSE)
cat(sprintf("Saved %s\n", metadata_filename))
Saved sample_metadata_effect_0.1.csv
write.csv(gene_module_membership, module_membership_filename, row.names = FALSE)
cat(sprintf("Saved %s\n\n", module_membership_filename))
Saved gene_module_membership_effect_0.1.csv
# --- 8. Verify Overlap with PCA Plot ---
# Transpose the expression data for PCA (PCA expects samples as rows, genes as columns)
pca_input_data <- t(synthetic_expression_data)
# Perform PCA
pca_results <- prcomp(pca_input_data, center = TRUE, scale. = TRUE) # Scale is usually good for gene expression PCA
# Extract PCA scores (coordinates of samples in PCA space)
pca_scores <- as.data.frame(pca_results$x)
# Add condition labels to the PCA scores
pca_scores$Condition <- sample_metadata[rownames(pca_scores), "Condition"]
# Calculate explained variance for PC1 and PC2
pca_summary <- summary(pca_results)
pc1_var_explained <- round(pca_summary$importance[2, 1] * 100, 2)
pc2_var_explained <- round(pca_summary$importance[2, 2] * 100, 2)
# --- Function to generate closed ellipse points for overlap calculation ---
ellipse_points <- function(df, level = 0.95, n = 100) {
# For confidence ellipses, the scaling factor from stats::ellipse is chisq(level)/2
# For normal distribution, it's sqrt(qchisq(level, df = 2))
# The stat_ellipse uses a t-distribution by default, or normal if type="norm"
# Here we use the standard deviation for scaling to match a specific confidence region
# Calculate 95% confidence region scaling factor from a chi-squared distribution with 2 degrees of freedom
# (since we are in 2D for PC1 and PC2)
scale_factor <- sqrt(qchisq(level, df = 2))
cov_mat <- cov(df[, c("PC1", "PC2")])
mean_vals <- colMeans(df[, c("PC1", "PC2")])
angles <- seq(0, 2 * pi, length.out = n)
# Scale by the square root of the eigenvalues and rotate by eigenvectors (cholesky decomposition)
ellipse_coords <- t(chol(cov_mat)) %*% rbind(cos(angles), sin(angles))
coords <- data.frame(
PC1 = mean_vals[1] + scale_factor * ellipse_coords[1,],
PC2 = mean_vals[2] + scale_factor * ellipse_coords[2,]
)
# Close the polygon by adding the first point at the end
coords <- rbind(coords, coords[1,])
return(coords)
}
# --- Compute Overlap of Ellipses ---
# Use the pca_scores as the pca_df mentioned in the user's snippet
pca_df_for_overlap <- pca_scores
# Generate closed ellipse points for each condition
ellipse_healthy <- ellipse_points(pca_df_for_overlap %>% filter(Condition == "Healthy"))
ellipse_diseased <- ellipse_points(pca_df_for_overlap %>% filter(Condition == "Diseased"))
# Convert to sf polygons
# Need to set a CRS, 4326 is WGS84, common for geographic coords, but can be arbitrary for abstract space
poly_healthy <- st_polygon(list(as.matrix(ellipse_healthy)))
poly_diseased <- st_polygon(list(as.matrix(ellipse_diseased)))
# Set CRS to ensure st_intersection works (can be any valid CRS for arbitrary coordinates)
poly_healthy_sfc <- st_sfc(poly_healthy, crs = 4326)
poly_diseased_sfc <- st_sfc(poly_diseased, crs = 4326)
# Compute overlap area
intersection_geometry <- st_intersection(poly_healthy_sfc, poly_diseased_sfc)
# Convert sf::units to numeric before operations
intersect_area <- as.numeric(st_area(intersection_geometry))
# Calculate total area for normalization
area_healthy <- as.numeric(st_area(poly_healthy_sfc))
area_diseased <- as.numeric(st_area(poly_diseased_sfc))
# "Total area" for overlap percentage usually means the area of the smaller ellipse
# or the sum of both areas. User specified min(area_healthy, area_diseased).
total_area_for_overlap_calc <- min(area_healthy, area_diseased)
overlap_value <- if (isTRUE(!is.na(intersect_area) && total_area_for_overlap_calc > 0)) {(intersect_area / total_area_for_overlap_calc) * 100
} else { 0 }
# Ensure overlap_value is not NaN if there's no intersection
if (is.nan(overlap_value)) {
overlap_value <- 0
}
# --- Plot PCA1 vs PCA2 with customized theme and overlap annotation ---
cat("\n--- Generating PCA Plot ---\n")
--- Generating PCA Plot ---
pca_plot <- ggplot(pca_scores, aes(x = PC1, y = PC2, color = Condition, fill = Condition)) +
geom_point(size = 3, alpha = 0.8) + # Scatter points # Create and fill ellipses
stat_ellipse(geom = "polygon", alpha = 0.2, linetype = "solid", level = 0.95) +
labs(
title = paste0("Synthetic Gene Expression Data (Effect: ", log_module_effect_magnitude, ")"),
x = paste0("PC1 (", pc1_var_explained, "%)"),
y = paste0("PC2 (", pc2_var_explained, "%)")
) +
theme_minimal() +
theme(
panel.grid = element_blank(),# Remove gridlines
axis.line = element_line(color = "black"), # Add axis lines
axis.title = element_text(size = 14, face = "bold"), # Increase font size of axis labels
axis.text = element_text(size = 12), # Increase tick label size
plot.title = element_text(hjust = 0.5), # Center title
legend.position = "bottom"
)
# Get plot limits after defining the plot object 'pca_plot'
# This is crucial for correctly positioning the annotation
plot_limits <- ggplot_build(pca_plot)$layout$panel_params[[1]]
xmin <- plot_limits$x.range[1]
ymin <- plot_limits$y.range[1]
# Add annotation at lower-left
final_pca_plot <- pca_plot +
annotate("text", x = xmin + 0.05 * (plot_limits$x.range[2] - xmin),
y = ymin + 0.05 * (plot_limits$y.range[2] - ymin),
label = sprintf("Overlap = %.2f%%", overlap_value),
size = 5, hjust = 0, vjust = 0, color = "black")
print(final_pca_plot)
# --- Save PCA Plot ---
pca_plot_filename <- paste0("pca_plot_effect_", log_module_effect_magnitude, ".png")
ggsave(pca_plot_filename, plot = final_pca_plot, width = 8, height = 6, dpi = 300, bg = "white")
cat(sprintf("Saved %s\n\n", pca_plot_filename))
Saved pca_plot_effect_0.1.png
# To see how much variance is explained by the first few PCs:
print("Variance explained by PCs:")
[1] "Variance explained by PCs:"
print(summary(pca_results))
Importance of components:
PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11 PC12
Standard deviation 5.9975 5.6944 5.5625 5.37626 5.13735 4.68516 4.45587 4.10204 3.83832 3.5583 1.61729 1.54558
Proportion of Variance 0.1199 0.1081 0.1031 0.09635 0.08797 0.07317 0.06618 0.05609 0.04911 0.0422 0.00872 0.00796
Cumulative Proportion 0.1199 0.2280 0.3311 0.42748 0.51545 0.58862 0.65480 0.71089 0.76000 0.8022 0.81092 0.81888
PC13 PC14 PC15 PC16 PC17 PC18 PC19 PC20 PC21 PC22 PC23 PC24
Standard deviation 1.41677 1.39036 1.37102 1.31318 1.31255 1.2614 1.24605 1.20463 1.15412 1.13821 1.12768 1.11778
Proportion of Variance 0.00669 0.00644 0.00627 0.00575 0.00574 0.0053 0.00518 0.00484 0.00444 0.00432 0.00424 0.00416
Cumulative Proportion 0.82558 0.83202 0.83828 0.84403 0.84978 0.8551 0.86025 0.86509 0.86953 0.87385 0.87809 0.88225
PC25 PC26 PC27 PC28 PC29 PC30 PC31 PC32 PC33 PC34 PC35 PC36
Standard deviation 1.09930 1.07351 1.05472 1.03486 1.01498 1.0101 0.99367 0.97079 0.96641 0.94659 0.93046 0.92908
Proportion of Variance 0.00403 0.00384 0.00371 0.00357 0.00343 0.0034 0.00329 0.00314 0.00311 0.00299 0.00289 0.00288
Cumulative Proportion 0.88628 0.89012 0.89383 0.89740 0.90084 0.9042 0.90753 0.91067 0.91378 0.91677 0.91965 0.92253
PC37 PC38 PC39 PC40 PC41 PC42 PC43 PC44 PC45 PC46 PC47
Standard deviation 0.91057 0.90483 0.89520 0.88891 0.87789 0.86358 0.83940 0.82847 0.81031 0.79885 0.79708
Proportion of Variance 0.00276 0.00273 0.00267 0.00263 0.00257 0.00249 0.00235 0.00229 0.00219 0.00213 0.00212
Cumulative Proportion 0.92530 0.92802 0.93070 0.93333 0.93590 0.93838 0.94073 0.94302 0.94521 0.94734 0.94946
PC48 PC49 PC50 PC51 PC52 PC53 PC54 PC55 PC56 PC57 PC58 PC59
Standard deviation 0.78352 0.77715 0.75752 0.74722 0.73721 0.72275 0.71252 0.70847 0.69643 0.67728 0.6699 0.65457
Proportion of Variance 0.00205 0.00201 0.00191 0.00186 0.00181 0.00174 0.00169 0.00167 0.00162 0.00153 0.0015 0.00143
Cumulative Proportion 0.95150 0.95351 0.95543 0.95729 0.95910 0.96084 0.96253 0.96421 0.96582 0.96735 0.9688 0.97028
PC60 PC61 PC62 PC63 PC64 PC65 PC66 PC67 PC68 PC69 PC70 PC71
Standard deviation 0.64663 0.63986 0.6256 0.61647 0.61090 0.60308 0.58699 0.58199 0.56844 0.55709 0.5478 0.53352
Proportion of Variance 0.00139 0.00136 0.0013 0.00127 0.00124 0.00121 0.00115 0.00113 0.00108 0.00103 0.0010 0.00095
Cumulative Proportion 0.97167 0.97303 0.9743 0.97561 0.97685 0.97806 0.97921 0.98034 0.98142 0.98245 0.9835 0.98440
PC72 PC73 PC74 PC75 PC76 PC77 PC78 PC79 PC80 PC81 PC82
Standard deviation 0.52746 0.52144 0.50392 0.50171 0.49259 0.48332 0.47285 0.46576 0.46154 0.45619 0.43384
Proportion of Variance 0.00093 0.00091 0.00085 0.00084 0.00081 0.00078 0.00075 0.00072 0.00071 0.00069 0.00063
Cumulative Proportion 0.98533 0.98623 0.98708 0.98792 0.98873 0.98951 0.99025 0.99098 0.99169 0.99238 0.99301
PC83 PC84 PC85 PC86 PC87 PC88 PC89 PC90 PC91 PC92 PC93 PC94
Standard deviation 0.43317 0.4242 0.41429 0.40223 0.39071 0.37418 0.36636 0.35735 0.34233 0.32954 0.32829 0.31500
Proportion of Variance 0.00063 0.0006 0.00057 0.00054 0.00051 0.00047 0.00045 0.00043 0.00039 0.00036 0.00036 0.00033
Cumulative Proportion 0.99363 0.9942 0.99480 0.99534 0.99585 0.99632 0.99677 0.99719 0.99758 0.99795 0.99830 0.99864
PC95 PC96 PC97 PC98 PC99 PC100
Standard deviation 0.30976 0.29349 0.28633 0.27680 0.26230 2.663e-15
Proportion of Variance 0.00032 0.00029 0.00027 0.00026 0.00023 0.000e+00
Cumulative Proportion 0.99895 0.99924 0.99952 0.99977 1.00000 1.000e+00
cat(paste0(rep("-", 50), collapse = ""), "\n\n")
--------------------------------------------------
# --- 9. Compute Overlap Percentage (Estimated using Logistic Regression) ---
# This is a separate estimation of overlap from the geometric one.
cat("\n--- Computing Overlap Percentage (Estimated via Logistic Regression) ---\n")
--- Computing Overlap Percentage (Estimated via Logistic Regression) ---
# Ensure 'Condition' is a factor for classification
pca_scores$Condition <- factor(pca_scores$Condition)
# Train a logistic regression model
logistic_model <- glm(Condition ~ PC1 + PC2, data = pca_scores, family = "binomial")
# Predict probabilities
probabilities <- predict(logistic_model, type = "response")
# Convert probabilities to class predictions
# The levels should match 'Healthy' and 'Diseased'
predicted_classes_lr <- factor(ifelse(probabilities > 0.5, "Diseased", "Healthy"), levels = levels(pca_scores$Condition))
# Calculate misclassification rate
misclassification_rate_lr <- mean(predicted_classes_lr != pca_scores$Condition)
overlap_percentage_lr <- round(misclassification_rate_lr * 100, 2)
cat(sprintf("Estimated Overlap Percentage (Logistic Regression misclassification): %.2f%%\n", overlap_percentage_lr))
Estimated Overlap Percentage (Logistic Regression misclassification): 56.00%
cat(paste0(rep("=", 50), collapse = ""), "\n")
==================================================
NA
NA
NA
#renv::install('tibble')
library(tibble)
library(Biobase)
library(annotate)
phenData = AnnotatedDataFrame(
read.csv(paste0("sample_metadata_effect_",variability,".csv"), header=T, row.names = 1)
)
eSet = new("ExpressionSet",
exprs= as.matrix(synthetic_expression_data),
phenoData = phenData
)
eSet
ExpressionSet (storageMode: lockedEnvironment)
assayData: 300 features, 100 samples
element names: exprs
protocolData: none
phenoData
sampleNames: Healthy_Sample_1 Healthy_Sample_2 ... Diseased_Sample_50 (100 total)
varLabels: Condition
varMetadata: labelDescription
featureData: none
experimentData: use 'experimentData(object)'
Annotation:
summary(exprs(eSet))[,1:5]
Healthy_Sample_1 Healthy_Sample_2 Healthy_Sample_3 Healthy_Sample_4 Healthy_Sample_5
Min. : 1.997 Min. : 0.5559 Min. : 1.808 Min. : 1.150 Min. : 1.583
1st Qu.: 11.168 1st Qu.: 6.9661 1st Qu.: 6.641 1st Qu.: 6.635 1st Qu.: 8.984
Median : 20.813 Median : 18.0270 Median :11.539 Median :14.324 Median :16.189
Mean : 25.438 Mean : 28.3225 Mean :15.859 Mean :18.243 Mean :17.348
3rd Qu.: 36.618 3rd Qu.: 31.5246 3rd Qu.:21.709 3rd Qu.:26.133 3rd Qu.:23.273
Max. :121.242 Max. :233.2341 Max. :83.059 Max. :65.514 Max. :96.859
#renv::install('limma')
library(limma)
normData = normalizeBetweenArrays(exprs(eSet))
eSet.norm = new("ExpressionSet",
exprs= as.matrix(normData),
phenoData = phenData)
boxplot(exprs(eSet.norm)[,1:10])
summary(exprs(eSet.norm))[,1:5]
Healthy_Sample_1 Healthy_Sample_2 Healthy_Sample_3 Healthy_Sample_4 Healthy_Sample_5
Min. : 2.559 Min. : 2.559 Min. : 2.559 Min. : 2.559 Min. : 2.559
1st Qu.: 9.478 1st Qu.: 9.478 1st Qu.: 9.478 1st Qu.: 9.478 1st Qu.: 9.478
Median : 16.138 Median : 16.138 Median : 16.138 Median : 16.138 Median : 16.138
Mean : 24.848 Mean : 24.848 Mean : 24.848 Mean : 24.848 Mean : 24.848
3rd Qu.: 27.477 3rd Qu.: 27.477 3rd Qu.: 27.477 3rd Qu.: 27.477 3rd Qu.: 27.477
Max. :194.789 Max. :194.789 Max. :194.789 Max. :194.789 Max. :194.789
library(WGCNA)
dat.matrix = exprs(eSet.norm)
dat.expr = as.data.frame(t(dat.matrix))
gsg = goodSamplesGenes(dat.expr, verbose = 3)
Flagging genes and samples with too many missing values...
..step 1
if(sum(!gsg$goodSamples)>0) {
summary(exprs(eSet.norm.combat))[,!gsg$goodSamples]
}
sprintf("There are %d bad samples in the dataset.", sum(!gsg$goodSamples))
[1] "There are 0 bad samples in the dataset."
if(sum(!gsg$goodGenes)) {
bad.genes.list = colnames(dat.expr)[!gsg$goodGenes]
}
sprintf('There are %d bad genes in the dataset.', sum(!gsg$goodGenes))
[1] "There are 0 bad genes in the dataset."
sampleTree = hclust(dist(dat.expr), method = "average")
plot(sampleTree,
main = "Tree cluster for outlier detection", sub="", xlab="",
cex.lab = 1.5, cex.axis = 1.5, cex.main = 2,
)
h=25
clust = cutreeStatic(sampleTree, cutHeight = h, minSize = 10)
table(clust)
clust
0
100
library(WGCNA)
options(stringsAsFactors = FALSE)
disableWGCNAThreads()
ggset = as.data.frame(t(exprs(eSet.norm)))
head(ggset)
softPower = 1
adj = adjacency(ggset, power = softPower)
dim(adj)
[1] 300 300
TOM = TOMsimilarity(adj);
..connectivity..
..matrix multiplication (system BLAS)..
..normalization..
..done.
dissTOM = 1-TOM
dim(TOM)
[1] 300 300
# Call the hierarchical clustering function
geneTree = hclust(as.dist(dissTOM), method = "average");
# Plot the resulting clustering tree (dendrogram)
sizeGrWindow(12,9)
plot(geneTree, xlab="", sub="",
main = "Gene clustering of Synthetic Dataset",
labels = FALSE, hang = 0.04)
# We like large modules, so we set the minimum module size relatively high:
minModuleSize = 10;
# Module identification using dynamic tree cut:
dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM,
deepSplit = 2, pamRespectsDendro = FALSE,
minClusterSize = minModuleSize)
..cutHeight not given, setting it to 0.809 ===> 99% of the (truncated) height range in dendro.
..done.
table(dynamicMods)
dynamicMods
1 2 3 4 5 6 7 8 9 10
30 30 30 30 30 30 30 30 30 30
# Convert numeric lables into colors
dynamicColors = labels2colors(dynamicMods)
table(dynamicColors)
dynamicColors
black blue brown green magenta pink purple red turquoise yellow
30 30 30 30 30 30 30 30 30 30
# Plot the dendrogram and colors underneath
sizeGrWindow(8,6)
plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05,
main = "Gene dendrogram of Synthetic Dataset")
# We like large modules, so we set the minimum module size relatively high:
minModuleSize = 10;
# Module identification using dynamic tree cut:
dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM,
deepSplit = 2, pamRespectsDendro = FALSE,
minClusterSize = minModuleSize)
..cutHeight not given, setting it to 0.809 ===> 99% of the (truncated) height range in dendro.
..done.
table(dynamicMods)
dynamicMods
1 2 3 4 5 6 7 8 9 10
30 30 30 30 30 30 30 30 30 30
# Convert numeric lables into colors
dynamicColors = labels2colors(dynamicMods)
table(dynamicColors)
dynamicColors
black blue brown green magenta pink purple red turquoise yellow
30 30 30 30 30 30 30 30 30 30
# Plot the dendrogram and colors underneath
sizeGrWindow(8,6)
plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05,
main = "Gene dendrogram of Synthetic Dataset")
datTraits = pData(phenData)
datTraits$Condition <- ifelse(datTraits$Condition == "Healthy", 0, 1)
datTraits
NA
library(ggplot2)
datExpr = ggset
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)
datTraits = pData(eSet.norm)
datTraits$Condition <- ifelse(datTraits$Condition == "Healthy", 0, 1)
MEs0 = moduleEigengenes(datExpr, dynamicColors)$eigengenes
MEs = orderMEs(MEs0)
moduleTraitCor = cor(MEs, datTraits, use = "p")
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)
# Will display correlations and their p-values
png(paste0("WGCNAModules_Synthetic_", variability, ".png"),width=7,height=5,units="in",res=600)
textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
par(mar = c(6, 10, 3, 3))
par(mar = c(8, 8, 2, 1)) # Reduce margins (bottom, left, top, right)
labeledHeatmap(Matrix = moduleTraitCor,
xLabels = names(datTraits),
yLabels = names(MEs),
ySymbols = names(MEs),
colorLabels = FALSE,
colors = blueWhiteRed(50),
textMatrix = textMatrix,
setStdMargins = FALSE,
cex.text = 0.3,
zlim = c(-1,1),
main = paste("Diseased vs Healthy"))
healthy.exprs = exprs(eSet.norm)[,1:50]
write.table(healthy.exprs,file = paste0("synthetic_healthy_", log_module_effect_magnitude,".txt"), sep = "\t",
quote = FALSE, row.names = TRUE, col.names = TRUE)
diseased.exprs = exprs(eSet.norm)[,51:100]
write.table(diseased.exprs, file = paste0("synthetic_diseased_", log_module_effect_magnitude,".txt"), sep = "\t",
quote = FALSE, row.names = TRUE, col.names = TRUE)
dim(healthy.exprs)
[1] 300 50
dim(diseased.exprs)
[1] 300 50
healthy.dcor = read.csv("dcor-method/dcor_synthetic_healthy_0.1.csv", header=T)
healthy.dcor = as.matrix(healthy.dcor[,-c(1)])
dim(healthy.dcor)
[1] 300 300
TOM.healthy.dcor = TOMsimilarity(as.matrix(healthy.dcor));
..connectivity..
..matrix multiplication (system BLAS)..
..normalization..
..done.
dissTOM.healthy.dcor = 1-TOM.healthy.dcor
dim(TOM.healthy.dcor)
[1] 300 300
hist(rowSums(TOM.healthy.dcor))
write.table(dissTOM.healthy.dcor, file = "dcor-method/dissTOM_healthy_dcor9_0.1.txt", row.names = FALSE, col.names = FALSE, sep = " ")
# Call the hierarchical clustering function
geneTree = hclust(as.dist(dissTOM.healthy.dcor), method = "average");
# Plot the resulting clustering tree (dendrogram)
sizeGrWindow(12,9)
plot(geneTree, xlab="", sub="",
main = "Gene clustering of Healthy Samples based on TOM-based dissimilarity",
labels = FALSE)
# We like large modules, so we set the minimum module size relatively high:
minModuleSize = 10;
# Module identification using dynamic tree cut:
dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM.healthy.dcor,
deepSplit = 2, pamRespectsDendro = FALSE,
minClusterSize = minModuleSize)
..cutHeight not given, setting it to 0.745 ===> 99% of the (truncated) height range in dendro.
..done.
table(dynamicMods)
dynamicMods
1 2 3 4 5 6 7 8 9 10
32 32 31 31 30 30 30 30 28 26
# Convert numeric lables into colors
dynamicColors = labels2colors(dynamicMods)
table(dynamicColors)
dynamicColors
black blue brown green magenta pink purple red turquoise yellow
30 32 31 30 28 30 26 30 32 31
# Plot the dendrogram and colors underneath
sizeGrWindow(8,6)
plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05,
main = "Gene dendrogram of HealthySamples")
healthy.ggset = read.table(paste0("dcor-method/","synthetic_healthy_0.1.txt"), header = TRUE, sep="\t")
rownames(healthy.ggset) = healthy.ggset$Genes
healthy.ggset = as.data.frame(t(healthy.ggset[,-1]))
healthy.ggset = healthy.ggset[,colnames(healthy.dcor)]
head(healthy.ggset)
# Calculate eigengenes
MEList = moduleEigengenes(as.matrix(healthy.ggset), colors = dynamicColors)
MEs = MEList$eigengenes
# Calculate dissimilarity of module eigengenes
MEDiss = 1-cor(MEs);
# Cluster module eigengenes
METree = hclust(as.dist(MEDiss), method = "average");
# Plot the result
sizeGrWindow(16, 8)
plot(METree, main = "Clustering of module eigengenes in Healthy Samples",
xlab = "", sub = "")
# Merging modules
MEDissThres = 0.2
# Plot the cut line into the dendrogram
abline(h=MEDissThres, col = "red")
# Rename to moduleColors
moduleColors = dynamicColors
# Construct numerical labels corresponding to the colors
colorOrder = c("grey", standardColors(50));
moduleLabels = match(moduleColors, colorOrder)-1;
dim(MEs)
[1] 50 10
freq.tab <- as.data.frame(table(moduleColors))
colnames(freq.tab) <- c("Modules", "Membership")
freq.tab = freq.tab[order(-freq.tab$Membership), ]
rownames(freq.tab) <- 1:nrow(freq.tab)
freq.tab$Modules = factor(freq.tab$Modules, levels = freq.tab$Modules)
ggplot(freq.tab, aes(x = Modules, y = Membership, fill = Modules)) +
geom_bar(stat = "identity", color = "grey1", size = 0.5) +
labs(title = "Module Size in Healthy Patients",
x = "Modules",
y = "Membership") +
scale_fill_manual(values=as.character(freq.tab$Modules)) + # Set bar colors
theme_minimal() +
theme(
panel.grid = element_blank(), # Remove grid lines
axis.line = element_line(size = 0.5), # Adjust line width of axes
axis.text.x = element_text(size = 10, color='black', angle = 90), # Adjust font size of axis text
axis.text.y = element_text(size = 10, color='black'),
axis.title = element_text(size = 16) # Adjust font size of axis labels
) +
theme(legend.position = "none") # Remove legend if unnecessary
# Function to extract genes belonging to each module
genelist = names(healthy.ggset)
get_module_genes <- function(genelist, moduleColors) {
unique_colors <- unique(moduleColors) # Get unique module names
module_genes <- lapply(unique_colors, function(color) {
which(moduleColors == color) # Indices of genes in this module
})
names(module_genes) <- unique_colors
# Map indices back to gene names
module_genes <- lapply(module_genes, function(indices) genelist[indices])
return(module_genes)
}
healthy_modules <- get_module_genes(colnames(healthy.dcor), moduleColors)
# Create a dataframe
df <- data.frame(
genes = colnames(healthy.dcor),
modules = moduleColors
)
# Save the dataframe to a CSV file
output_file <- paste0("dcor-method/","modules_healthy_0.1.csv")
write.csv(df, file = output_file, row.names = F)
length(df$genes)
[1] 300
unique(df$modules)
[1] "magenta" "blue" "yellow" "turquoise" "red" "purple" "brown" "black" "green"
[10] "pink"
head(df)
diseased.dcor = read.csv("dcor-method/dcor_synthetic_diseased_0.1.csv", header=T)
diseased.dcor = as.matrix(diseased.dcor[,-c(1)])
dim(diseased.dcor)
[1] 300 300
TOM.diseased.dcor = TOMsimilarity(as.matrix(diseased.dcor));
..connectivity..
..matrix multiplication (system BLAS)..
..normalization..
..done.
dissTOM.diseased.dcor = 1-TOM.diseased.dcor
dim(TOM.diseased.dcor)
[1] 300 300
hist(rowSums(TOM.diseased.dcor))
write.table(dissTOM.diseased.dcor, file = "dcor-method/dissTOM_diseased_dcor9_0.1.txt", row.names = FALSE, col.names = FALSE, sep = " ")
# Call the hierarchical clustering function
geneTree2 = hclust(as.dist(dissTOM.diseased.dcor), method = "average");
# Plot the resulting clustering tree (dendrogram)
sizeGrWindow(12,9)
plot(geneTree2, xlab="", sub="",
main = "Gene clustering of Healthy Samples based on TOM-based dissimilarity",
labels = FALSE)
# We like large modules, so we set the minimum module size relatively high:
minModuleSize = 10;
# Module identification using dynamic tree cut:
dynamicMods2 = cutreeDynamic(dendro = geneTree2, distM = dissTOM.diseased.dcor,
deepSplit = 2, pamRespectsDendro = FALSE,
minClusterSize = minModuleSize)
..cutHeight not given, setting it to 0.743 ===> 99% of the (truncated) height range in dendro.
..done.
table(dynamicMods2)
dynamicMods2
1 2 3 4 5 6 7 8 9 10
31 31 30 30 30 30 30 30 30 28
# Convert numeric lables into colors
dynamicColors2 = labels2colors(dynamicMods2)
table(dynamicColors2)
dynamicColors2
black blue brown green magenta pink purple red turquoise yellow
30 31 30 30 30 30 28 30 31 30
# Plot the dendrogram and colors underneath
sizeGrWindow(8,6)
plotDendroAndColors(geneTree2, dynamicColors2, "Dynamic Tree Cut",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05,
main = "Gene dendrogram of Diseased Samples")
diseased.ggset = read.table(paste0("dcor-method/","synthetic_diseased_0.1.txt"), header = TRUE, sep="\t")
rownames(diseased.ggset) = diseased.ggset$Genes
diseased.ggset = as.data.frame(t(diseased.ggset[,-1]))
diseased.ggset = diseased.ggset[,colnames(diseased.dcor)]
head(diseased.ggset)
# Calculate eigengenes
MEList2 = moduleEigengenes(as.matrix(diseased.ggset), colors = dynamicColors2)
MEs2 = MEList2$eigengenes
# Calculate dissimilarity of module eigengenes
MEDiss2 = 1-cor(MEs2);
# Cluster module eigengenes
METree2 = hclust(as.dist(MEDiss2), method = "average");
# Plot the result
sizeGrWindow(16, 8)
plot(METree2, main = "Clustering of module eigengenes in Healthy Samples",
xlab = "", sub = "")
# Merging modules
MEDissThres = 0.2
# Plot the cut line into the dendrogram
abline(h=MEDissThres, col = "red")
# Rename to moduleColors
moduleColors2 = dynamicColors2
# Construct numerical labels corresponding to the colors
colorOrder2 = c("grey", standardColors(50));
moduleLabels2 = match(moduleColors2, colorOrder2)-1;
dim(MEs2)
[1] 50 10
freq.tab <- as.data.frame(table(moduleColors2))
colnames(freq.tab) <- c("Modules", "Membership")
freq.tab = freq.tab[order(-freq.tab$Membership), ]
rownames(freq.tab) <- 1:nrow(freq.tab)
freq.tab$Modules = factor(freq.tab$Modules, levels = freq.tab$Modules)
ggplot(freq.tab, aes(x = Modules, y = Membership, fill = Modules)) +
geom_bar(stat = "identity", color = "grey1", size = 0.5) +
labs(title = "Module Size in Healthy Patients",
x = "Modules",
y = "Membership") +
scale_fill_manual(values=as.character(freq.tab$Modules)) + # Set bar colors
theme_minimal() +
theme(
panel.grid = element_blank(), # Remove grid lines
axis.line = element_line(size = 0.5), # Adjust line width of axes
axis.text.x = element_text(size = 10, color='black', angle = 90), # Adjust font size of axis text
axis.text.y = element_text(size = 10, color='black'),
axis.title = element_text(size = 16) # Adjust font size of axis labels
) +
theme(legend.position = "none") # Remove legend if unnecessary
# Function to extract genes belonging to each module
genelist2 = names(diseased.ggset)
get_module_genes <- function(genelist, moduleColors) {
unique_colors <- unique(moduleColors) # Get unique module names
module_genes <- lapply(unique_colors, function(color) {
which(moduleColors == color) # Indices of genes in this module
})
names(module_genes) <- unique_colors
# Map indices back to gene names
module_genes <- lapply(module_genes, function(indices) genelist[indices])
return(module_genes)
}
diseased_modules <- get_module_genes(colnames(diseased.dcor), moduleColors2)
# Create a dataframe
df <- data.frame(
genes = colnames(diseased.dcor),
modules = moduleColors2
)
# Save the dataframe to a CSV file
output_file <- paste0("dcor-method/","modules_diseased_0.1.csv")
write.csv(df, file = output_file, row.names = F)
length(df$genes)
[1] 300
unique(df$modules)
[1] "purple" "blue" "turquoise" "black" "brown" "red" "magenta" "green" "pink"
[10] "yellow"
head(df)
healthy.dcor = read.csv("dcor-method/dcor_synthetic_healthy_0.9.csv", header=T)
healthy.dcor = as.matrix(healthy.dcor[,-c(1)])
dim(healthy.dcor)
[1] 300 300
hist(rowSums(as.matrix(healthy.dcor)))
adj = adjacency.fromSimilarity(as.matrix(healthy.dcor), power=1)
hist(rowSums(adj))
TOM.healthy.dcor = TOMsimilarity(adj);
..connectivity..
..matrix multiplication (system BLAS)..
..normalization..
..done.
dissTOM.healthy.dcor = 1-TOM.healthy.dcor
dim(TOM.healthy.dcor)
[1] 300 300
hist(rowSums(TOM.healthy.dcor))
write.table(dissTOM.healthy.dcor, file = "dcor-method/dissTOM_healthy_dcor9_0.9.txt", row.names = FALSE, col.names = FALSE, sep = " ")
# Call the hierarchical clustering function
geneTree = hclust(as.dist(dissTOM.healthy.dcor), method = "average");
# Plot the resulting clustering tree (dendrogram)
sizeGrWindow(12,9)
plot(geneTree, xlab="", sub="",
main = "Gene clustering of Healthy Samples based on TOM-based dissimilarity",
labels = FALSE)
# We like large modules, so we set the minimum module size relatively high:
minModuleSize = 10;
# Module identification using dynamic tree cut:
dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM.healthy.dcor,
deepSplit = 2, pamRespectsDendro = FALSE,
minClusterSize = minModuleSize)
..cutHeight not given, setting it to 0.742 ===> 99% of the (truncated) height range in dendro.
..done.
table(dynamicMods)
dynamicMods
1 2 3 4 5 6 7 8
76 44 35 33 30 29 29 24
# Convert numeric lables into colors
dynamicColors = labels2colors(dynamicMods)
table(dynamicColors)
dynamicColors
black blue brown green pink red turquoise yellow
29 44 35 30 24 29 76 33
# Plot the dendrogram and colors underneath
sizeGrWindow(8,6)
plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05,
main = "Gene dendrogram of HealthySamples")
healthy.ggset = read.table(paste0("dcor-method/","synthetic_healthy_0.9.txt"), header = TRUE, sep="\t")
rownames(healthy.ggset) = healthy.ggset$Genes
healthy.ggset = as.data.frame(t(healthy.ggset[,-1]))
healthy.ggset = healthy.ggset[,colnames(healthy.dcor)]
head(healthy.ggset)
# Calculate eigengenes
MEList = moduleEigengenes(as.matrix(healthy.ggset), colors = dynamicColors)
MEs = MEList$eigengenes
# Calculate dissimilarity of module eigengenes
MEDiss = 1-cor(MEs);
# Cluster module eigengenes
METree = hclust(as.dist(MEDiss), method = "average");
# Plot the result
sizeGrWindow(16, 8)
plot(METree, main = "Clustering of module eigengenes in Healthy Samples",
xlab = "", sub = "")
# Merging modules
MEDissThres = 0.2
# Plot the cut line into the dendrogram
abline(h=MEDissThres, col = "red")
# Rename to moduleColors
moduleColors = dynamicColors
# Construct numerical labels corresponding to the colors
colorOrder = c("grey", standardColors(50));
moduleLabels = match(moduleColors, colorOrder)-1;
dim(MEs)
[1] 50 8
freq.tab <- as.data.frame(table(moduleColors))
colnames(freq.tab) <- c("Modules", "Membership")
freq.tab = freq.tab[order(-freq.tab$Membership), ]
rownames(freq.tab) <- 1:nrow(freq.tab)
freq.tab$Modules = factor(freq.tab$Modules, levels = freq.tab$Modules)
ggplot(freq.tab, aes(x = Modules, y = Membership, fill = Modules)) +
geom_bar(stat = "identity", color = "grey1", size = 0.5) +
labs(title = "Module Size in Healthy Patients",
x = "Modules",
y = "Membership") +
scale_fill_manual(values=as.character(freq.tab$Modules)) + # Set bar colors
theme_minimal() +
theme(
panel.grid = element_blank(), # Remove grid lines
axis.line = element_line(size = 0.5), # Adjust line width of axes
axis.text.x = element_text(size = 10, color='black', angle = 90), # Adjust font size of axis text
axis.text.y = element_text(size = 10, color='black'),
axis.title = element_text(size = 16) # Adjust font size of axis labels
) +
theme(legend.position = "none") # Remove legend if unnecessary
# Function to extract genes belonging to each module
genelist = names(healthy.ggset)
get_module_genes <- function(genelist, moduleColors) {
unique_colors <- unique(moduleColors) # Get unique module names
module_genes <- lapply(unique_colors, function(color) {
which(moduleColors == color) # Indices of genes in this module
})
names(module_genes) <- unique_colors
# Map indices back to gene names
module_genes <- lapply(module_genes, function(indices) genelist[indices])
return(module_genes)
}
healthy_modules <- get_module_genes(colnames(healthy.dcor), moduleColors)
# Create a dataframe
df <- data.frame(
genes = colnames(healthy.dcor),
modules = moduleColors
)
# Save the dataframe to a CSV file
output_file <- paste0("dcor-method/","modules_healthy_0.9.csv")
write.csv(df, file = output_file, row.names = F)
length(df$genes)
[1] 300
unique(df$modules)
[1] "turquoise" "brown" "blue" "yellow" "green" "pink" "black" "red"
head(df)
diseased.dcor = read.csv("dcor-method/dcor_synthetic_diseased_0.9.csv", header=T)
diseased.dcor = as.matrix(diseased.dcor[,-c(1)])
dim(diseased.dcor)
[1] 300 300
hist(rowSums(as.matrix(diseased.dcor)))
adj = adjacency.fromSimilarity(as.matrix(diseased.dcor), power=1)
hist(rowSums(adj))
TOM.diseased.dcor = TOMsimilarity(adj);
..connectivity..
..matrix multiplication (system BLAS)..
..normalization..
..done.
dissTOM.diseased.dcor = 1-TOM.diseased.dcor
dim(TOM.diseased.dcor)
[1] 300 300
hist(rowSums(TOM.diseased.dcor))
write.table(dissTOM.diseased.dcor, file = "dcor-method/dissTOM_diseased_dcor9_0.9.txt", row.names = FALSE, col.names = FALSE, sep = " ")
# Call the hierarchical clustering function
geneTree2 = hclust(as.dist(dissTOM.diseased.dcor), method = "average");
# Plot the resulting clustering tree (dendrogram)
sizeGrWindow(12,9)
plot(geneTree2, xlab="", sub="",
main = "Gene clustering of Healthy Samples based on TOM-based dissimilarity",
labels = FALSE)
# We like large modules, so we set the minimum module size relatively high:
minModuleSize = 10;
# Module identification using dynamic tree cut:
dynamicMods2 = cutreeDynamic(dendro = geneTree2, distM = dissTOM.diseased.dcor,
deepSplit = 2, pamRespectsDendro = FALSE,
minClusterSize = minModuleSize)
..cutHeight not given, setting it to 0.741 ===> 99% of the (truncated) height range in dendro.
..done.
table(dynamicMods2)
dynamicMods2
1 2 3 4 5 6 7 8 9
63 36 33 30 30 29 27 27 25
# Convert numeric lables into colors
dynamicColors2 = labels2colors(dynamicMods2)
table(dynamicColors2)
dynamicColors2
black blue brown green magenta pink red turquoise yellow
27 36 33 30 25 27 29 63 30
# Plot the dendrogram and colors underneath
sizeGrWindow(8,6)
plotDendroAndColors(geneTree2, dynamicColors2, "Dynamic Tree Cut",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05,
main = "Gene dendrogram of Diseased Samples")
diseased.ggset = read.table(paste0("dcor-method/","synthetic_diseased_0.9.txt"), header = TRUE, sep="\t")
rownames(diseased.ggset) = diseased.ggset$Genes
diseased.ggset = as.data.frame(t(diseased.ggset[,-1]))
diseased.ggset = diseased.ggset[,colnames(diseased.dcor)]
head(diseased.ggset)
# Calculate eigengenes
MEList2 = moduleEigengenes(as.matrix(diseased.ggset), colors = dynamicColors2)
MEs2 = MEList2$eigengenes
# Calculate dissimilarity of module eigengenes
MEDiss2 = 1-cor(MEs2);
# Cluster module eigengenes
METree2 = hclust(as.dist(MEDiss2), method = "average");
# Plot the result
sizeGrWindow(16, 8)
plot(METree2, main = "Clustering of module eigengenes in Healthy Samples",
xlab = "", sub = "")
# Merging modules
MEDissThres = 0.2
# Plot the cut line into the dendrogram
abline(h=MEDissThres, col = "red")
# Rename to moduleColors
moduleColors2 = dynamicColors2
# Construct numerical labels corresponding to the colors
colorOrder2 = c("grey", standardColors(50));
moduleLabels2 = match(moduleColors2, colorOrder2)-1;
dim(MEs2)
[1] 50 9
freq.tab <- as.data.frame(table(moduleColors2))
colnames(freq.tab) <- c("Modules", "Membership")
freq.tab = freq.tab[order(-freq.tab$Membership), ]
rownames(freq.tab) <- 1:nrow(freq.tab)
freq.tab$Modules = factor(freq.tab$Modules, levels = freq.tab$Modules)
ggplot(freq.tab, aes(x = Modules, y = Membership, fill = Modules)) +
geom_bar(stat = "identity", color = "grey1", size = 0.5) +
labs(title = "Module Size in Healthy Patients",
x = "Modules",
y = "Membership") +
scale_fill_manual(values=as.character(freq.tab$Modules)) + # Set bar colors
theme_minimal() +
theme(
panel.grid = element_blank(), # Remove grid lines
axis.line = element_line(size = 0.5), # Adjust line width of axes
axis.text.x = element_text(size = 10, color='black', angle = 90), # Adjust font size of axis text
axis.text.y = element_text(size = 10, color='black'),
axis.title = element_text(size = 16) # Adjust font size of axis labels
) +
theme(legend.position = "none") # Remove legend if unnecessary
# renv::install("andymckenzie/DGCA")
library(DGCA)
log_module_effect_magnitude = 0.1
dgca_design <- data.frame(
Healthy = as.numeric(!datTraits$Condition),
Diseased = as.numeric(datTraits$Condition))
rownames(dgca_design) <- rownames(datTraits)
synthetic_expression_data = read.csv("synthetic_expression_data_effect_0.1.csv", header=T, row.names = "X")
modules_list = read.csv("wgcna/moduleColors_WGCNA_0.1.csv", header=T)
moduleDC_res <- moduleDC(inputMat = synthetic_expression_data,
design = as.matrix(dgca_design),
compare = c("Healthy","Diseased"), # Column name in dgca_design
genes = modules_list$genes,
labels = modules_list$modules,
nPerms = 50,
number_DC_genes = 10,
dCorAvgMethod = "median")
Calculating MDC for module #1, which is called grey
Calculating permutation number 1.
Calculating permutation number 2.
Calculating permutation number 3.
Calculating permutation number 4.
Calculating permutation number 5.
Calculating permutation number 6.
Calculating permutation number 7.
Calculating permutation number 8.
Calculating permutation number 9.
Calculating permutation number 10.
Calculating permutation number 11.
Calculating permutation number 12.
Calculating permutation number 13.
Calculating permutation number 14.
Calculating permutation number 15.
Calculating permutation number 16.
Calculating permutation number 17.
Calculating permutation number 18.
Calculating permutation number 19.
Calculating permutation number 20.
Calculating permutation number 21.
Calculating permutation number 22.
Calculating permutation number 23.
Calculating permutation number 24.
Calculating permutation number 25.
Calculating permutation number 26.
Calculating permutation number 27.
Calculating permutation number 28.
Calculating permutation number 29.
Calculating permutation number 30.
Calculating permutation number 31.
Calculating permutation number 32.
Calculating permutation number 33.
Calculating permutation number 34.
Calculating permutation number 35.
Calculating permutation number 36.
Calculating permutation number 37.
Calculating permutation number 38.
Calculating permutation number 39.
Calculating permutation number 40.
Calculating permutation number 41.
Calculating permutation number 42.
Calculating permutation number 43.
Calculating permutation number 44.
Calculating permutation number 45.
Calculating permutation number 46.
Calculating permutation number 47.
Calculating permutation number 48.
Calculating permutation number 49.
Calculating permutation number 50.
Classifying the differential correlation calls.
Calculating the differential correlation average.
Calculating the differential correlation average.
Calculating MDC for module #2, which is called green
Calculating permutation number 1.
Calculating permutation number 2.
Calculating permutation number 3.
Calculating permutation number 4.
Calculating permutation number 5.
Calculating permutation number 6.
Calculating permutation number 7.
Calculating permutation number 8.
Calculating permutation number 9.
Calculating permutation number 10.
Calculating permutation number 11.
Calculating permutation number 12.
Calculating permutation number 13.
Calculating permutation number 14.
Calculating permutation number 15.
Calculating permutation number 16.
Calculating permutation number 17.
Calculating permutation number 18.
Calculating permutation number 19.
Calculating permutation number 20.
Calculating permutation number 21.
Calculating permutation number 22.
Calculating permutation number 23.
Calculating permutation number 24.
Calculating permutation number 25.
Calculating permutation number 26.
Calculating permutation number 27.
Calculating permutation number 28.
Calculating permutation number 29.
Calculating permutation number 30.
Calculating permutation number 31.
Calculating permutation number 32.
Calculating permutation number 33.
Calculating permutation number 34.
Calculating permutation number 35.
Calculating permutation number 36.
Calculating permutation number 37.
Calculating permutation number 38.
Calculating permutation number 39.
Calculating permutation number 40.
Calculating permutation number 41.
Calculating permutation number 42.
Calculating permutation number 43.
Calculating permutation number 44.
Calculating permutation number 45.
Calculating permutation number 46.
Calculating permutation number 47.
Calculating permutation number 48.
Calculating permutation number 49.
Calculating permutation number 50.
Classifying the differential correlation calls.
Calculating the differential correlation average.
Calculating the differential correlation average.
Calculating MDC for module #3, which is called turquoise
Calculating permutation number 1.
Calculating permutation number 2.
Calculating permutation number 3.
Calculating permutation number 4.
Calculating permutation number 5.
Calculating permutation number 6.
Calculating permutation number 7.
Calculating permutation number 8.
Calculating permutation number 9.
Calculating permutation number 10.
Calculating permutation number 11.
Calculating permutation number 12.
Calculating permutation number 13.
Calculating permutation number 14.
Calculating permutation number 15.
Calculating permutation number 16.
Calculating permutation number 17.
Calculating permutation number 18.
Calculating permutation number 19.
Calculating permutation number 20.
Calculating permutation number 21.
Calculating permutation number 22.
Calculating permutation number 23.
Calculating permutation number 24.
Calculating permutation number 25.
Calculating permutation number 26.
Calculating permutation number 27.
Calculating permutation number 28.
Calculating permutation number 29.
Calculating permutation number 30.
Calculating permutation number 31.
Calculating permutation number 32.
Calculating permutation number 33.
Calculating permutation number 34.
Calculating permutation number 35.
Calculating permutation number 36.
Calculating permutation number 37.
Calculating permutation number 38.
Calculating permutation number 39.
Calculating permutation number 40.
Calculating permutation number 41.
Calculating permutation number 42.
Calculating permutation number 43.
Calculating permutation number 44.
Calculating permutation number 45.
Calculating permutation number 46.
Calculating permutation number 47.
Calculating permutation number 48.
Calculating permutation number 49.
Calculating permutation number 50.
Classifying the differential correlation calls.
Calculating the differential correlation average.
Calculating the differential correlation average.
Calculating MDC for module #4, which is called black
Calculating permutation number 1.
Calculating permutation number 2.
Calculating permutation number 3.
Calculating permutation number 4.
Calculating permutation number 5.
Calculating permutation number 6.
Calculating permutation number 7.
Calculating permutation number 8.
Calculating permutation number 9.
Calculating permutation number 10.
Calculating permutation number 11.
Calculating permutation number 12.
Calculating permutation number 13.
Calculating permutation number 14.
Calculating permutation number 15.
Calculating permutation number 16.
Calculating permutation number 17.
Calculating permutation number 18.
Calculating permutation number 19.
Calculating permutation number 20.
Calculating permutation number 21.
Calculating permutation number 22.
Calculating permutation number 23.
Calculating permutation number 24.
Calculating permutation number 25.
Calculating permutation number 26.
Calculating permutation number 27.
Calculating permutation number 28.
Calculating permutation number 29.
Calculating permutation number 30.
Calculating permutation number 31.
Calculating permutation number 32.
Calculating permutation number 33.
Calculating permutation number 34.
Calculating permutation number 35.
Calculating permutation number 36.
Calculating permutation number 37.
Calculating permutation number 38.
Calculating permutation number 39.
Calculating permutation number 40.
Calculating permutation number 41.
Calculating permutation number 42.
Calculating permutation number 43.
Calculating permutation number 44.
Calculating permutation number 45.
Calculating permutation number 46.
Calculating permutation number 47.
Calculating permutation number 48.
Calculating permutation number 49.
Calculating permutation number 50.
Classifying the differential correlation calls.
Calculating the differential correlation average.
Calculating the differential correlation average.
Calculating MDC for module #5, which is called brown
Calculating permutation number 1.
Calculating permutation number 2.
Calculating permutation number 3.
Calculating permutation number 4.
Calculating permutation number 5.
Calculating permutation number 6.
Calculating permutation number 7.
Calculating permutation number 8.
Calculating permutation number 9.
Calculating permutation number 10.
Calculating permutation number 11.
Calculating permutation number 12.
Calculating permutation number 13.
Calculating permutation number 14.
Calculating permutation number 15.
Calculating permutation number 16.
Calculating permutation number 17.
Calculating permutation number 18.
Calculating permutation number 19.
Calculating permutation number 20.
Calculating permutation number 21.
Calculating permutation number 22.
Calculating permutation number 23.
Calculating permutation number 24.
Calculating permutation number 25.
Calculating permutation number 26.
Calculating permutation number 27.
Calculating permutation number 28.
Calculating permutation number 29.
Calculating permutation number 30.
Calculating permutation number 31.
Calculating permutation number 32.
Calculating permutation number 33.
Calculating permutation number 34.
Calculating permutation number 35.
Calculating permutation number 36.
Calculating permutation number 37.
Calculating permutation number 38.
Calculating permutation number 39.
Calculating permutation number 40.
Calculating permutation number 41.
Calculating permutation number 42.
Calculating permutation number 43.
Calculating permutation number 44.
Calculating permutation number 45.
Calculating permutation number 46.
Calculating permutation number 47.
Calculating permutation number 48.
Calculating permutation number 49.
Calculating permutation number 50.
Classifying the differential correlation calls.
Calculating the differential correlation average.
Calculating the differential correlation average.
Calculating MDC for module #6, which is called magenta
Calculating permutation number 1.
Calculating permutation number 2.
Calculating permutation number 3.
Calculating permutation number 4.
Calculating permutation number 5.
Calculating permutation number 6.
Calculating permutation number 7.
Calculating permutation number 8.
Calculating permutation number 9.
Calculating permutation number 10.
Calculating permutation number 11.
Calculating permutation number 12.
Calculating permutation number 13.
Calculating permutation number 14.
Calculating permutation number 15.
Calculating permutation number 16.
Calculating permutation number 17.
Calculating permutation number 18.
Calculating permutation number 19.
Calculating permutation number 20.
Calculating permutation number 21.
Calculating permutation number 22.
Calculating permutation number 23.
Calculating permutation number 24.
Calculating permutation number 25.
Calculating permutation number 26.
Calculating permutation number 27.
Calculating permutation number 28.
Calculating permutation number 29.
Calculating permutation number 30.
Calculating permutation number 31.
Calculating permutation number 32.
Calculating permutation number 33.
Calculating permutation number 34.
Calculating permutation number 35.
Calculating permutation number 36.
Calculating permutation number 37.
Calculating permutation number 38.
Calculating permutation number 39.
Calculating permutation number 40.
Calculating permutation number 41.
Calculating permutation number 42.
Calculating permutation number 43.
Calculating permutation number 44.
Calculating permutation number 45.
Calculating permutation number 46.
Calculating permutation number 47.
Calculating permutation number 48.
Calculating permutation number 49.
Calculating permutation number 50.
Classifying the differential correlation calls.
Calculating the differential correlation average.
Calculating the differential correlation average.
Calculating MDC for module #7, which is called purple
Calculating permutation number 1.
Calculating permutation number 2.
Calculating permutation number 3.
Calculating permutation number 4.
Calculating permutation number 5.
Calculating permutation number 6.
Calculating permutation number 7.
Calculating permutation number 8.
Calculating permutation number 9.
Calculating permutation number 10.
Calculating permutation number 11.
Calculating permutation number 12.
Calculating permutation number 13.
Calculating permutation number 14.
Calculating permutation number 15.
Calculating permutation number 16.
Calculating permutation number 17.
Calculating permutation number 18.
Calculating permutation number 19.
Calculating permutation number 20.
Calculating permutation number 21.
Calculating permutation number 22.
Calculating permutation number 23.
Calculating permutation number 24.
Calculating permutation number 25.
Calculating permutation number 26.
Calculating permutation number 27.
Calculating permutation number 28.
Calculating permutation number 29.
Calculating permutation number 30.
Calculating permutation number 31.
Calculating permutation number 32.
Calculating permutation number 33.
Calculating permutation number 34.
Calculating permutation number 35.
Calculating permutation number 36.
Calculating permutation number 37.
Calculating permutation number 38.
Calculating permutation number 39.
Calculating permutation number 40.
Calculating permutation number 41.
Calculating permutation number 42.
Calculating permutation number 43.
Calculating permutation number 44.
Calculating permutation number 45.
Calculating permutation number 46.
Calculating permutation number 47.
Calculating permutation number 48.
Calculating permutation number 49.
Calculating permutation number 50.
Classifying the differential correlation calls.
Calculating the differential correlation average.
Calculating the differential correlation average.
Calculating MDC for module #8, which is called yellow
Calculating permutation number 1.
Calculating permutation number 2.
Calculating permutation number 3.
Calculating permutation number 4.
Calculating permutation number 5.
Calculating permutation number 6.
Calculating permutation number 7.
Calculating permutation number 8.
Calculating permutation number 9.
Calculating permutation number 10.
Calculating permutation number 11.
Calculating permutation number 12.
Calculating permutation number 13.
Calculating permutation number 14.
Calculating permutation number 15.
Calculating permutation number 16.
Calculating permutation number 17.
Calculating permutation number 18.
Calculating permutation number 19.
Calculating permutation number 20.
Calculating permutation number 21.
Calculating permutation number 22.
Calculating permutation number 23.
Calculating permutation number 24.
Calculating permutation number 25.
Calculating permutation number 26.
Calculating permutation number 27.
Calculating permutation number 28.
Calculating permutation number 29.
Calculating permutation number 30.
Calculating permutation number 31.
Calculating permutation number 32.
Calculating permutation number 33.
Calculating permutation number 34.
Calculating permutation number 35.
Calculating permutation number 36.
Calculating permutation number 37.
Calculating permutation number 38.
Calculating permutation number 39.
Calculating permutation number 40.
Calculating permutation number 41.
Calculating permutation number 42.
Calculating permutation number 43.
Calculating permutation number 44.
Calculating permutation number 45.
Calculating permutation number 46.
Calculating permutation number 47.
Calculating permutation number 48.
Calculating permutation number 49.
Calculating permutation number 50.
Classifying the differential correlation calls.
Calculating the differential correlation average.
Calculating the differential correlation average.
Calculating MDC for module #9, which is called red
Calculating permutation number 1.
Calculating permutation number 2.
Calculating permutation number 3.
Calculating permutation number 4.
Calculating permutation number 5.
Calculating permutation number 6.
Calculating permutation number 7.
Calculating permutation number 8.
Calculating permutation number 9.
Calculating permutation number 10.
Calculating permutation number 11.
Calculating permutation number 12.
Calculating permutation number 13.
Calculating permutation number 14.
Calculating permutation number 15.
Calculating permutation number 16.
Calculating permutation number 17.
Calculating permutation number 18.
Calculating permutation number 19.
Calculating permutation number 20.
Calculating permutation number 21.
Calculating permutation number 22.
Calculating permutation number 23.
Calculating permutation number 24.
Calculating permutation number 25.
Calculating permutation number 26.
Calculating permutation number 27.
Calculating permutation number 28.
Calculating permutation number 29.
Calculating permutation number 30.
Calculating permutation number 31.
Calculating permutation number 32.
Calculating permutation number 33.
Calculating permutation number 34.
Calculating permutation number 35.
Calculating permutation number 36.
Calculating permutation number 37.
Calculating permutation number 38.
Calculating permutation number 39.
Calculating permutation number 40.
Calculating permutation number 41.
Calculating permutation number 42.
Calculating permutation number 43.
Calculating permutation number 44.
Calculating permutation number 45.
Calculating permutation number 46.
Calculating permutation number 47.
Calculating permutation number 48.
Calculating permutation number 49.
Calculating permutation number 50.
Classifying the differential correlation calls.
Calculating the differential correlation average.
Calculating the differential correlation average.
Calculating MDC for module #10, which is called blue
Calculating permutation number 1.
Calculating permutation number 2.
Calculating permutation number 3.
Calculating permutation number 4.
Calculating permutation number 5.
Calculating permutation number 6.
Calculating permutation number 7.
Calculating permutation number 8.
Calculating permutation number 9.
Calculating permutation number 10.
Calculating permutation number 11.
Calculating permutation number 12.
Calculating permutation number 13.
Calculating permutation number 14.
Calculating permutation number 15.
Calculating permutation number 16.
Calculating permutation number 17.
Calculating permutation number 18.
Calculating permutation number 19.
Calculating permutation number 20.
Calculating permutation number 21.
Calculating permutation number 22.
Calculating permutation number 23.
Calculating permutation number 24.
Calculating permutation number 25.
Calculating permutation number 26.
Calculating permutation number 27.
Calculating permutation number 28.
Calculating permutation number 29.
Calculating permutation number 30.
Calculating permutation number 31.
Calculating permutation number 32.
Calculating permutation number 33.
Calculating permutation number 34.
Calculating permutation number 35.
Calculating permutation number 36.
Calculating permutation number 37.
Calculating permutation number 38.
Calculating permutation number 39.
Calculating permutation number 40.
Calculating permutation number 41.
Calculating permutation number 42.
Calculating permutation number 43.
Calculating permutation number 44.
Calculating permutation number 45.
Calculating permutation number 46.
Calculating permutation number 47.
Calculating permutation number 48.
Calculating permutation number 49.
Calculating permutation number 50.
Classifying the differential correlation calls.
Calculating the differential correlation average.
Calculating the differential correlation average.
Calculating MDC for module #11, which is called pink
Calculating permutation number 1.
Calculating permutation number 2.
Calculating permutation number 3.
Calculating permutation number 4.
Calculating permutation number 5.
Calculating permutation number 6.
Calculating permutation number 7.
Calculating permutation number 8.
Calculating permutation number 9.
Calculating permutation number 10.
Calculating permutation number 11.
Calculating permutation number 12.
Calculating permutation number 13.
Calculating permutation number 14.
Calculating permutation number 15.
Calculating permutation number 16.
Calculating permutation number 17.
Calculating permutation number 18.
Calculating permutation number 19.
Calculating permutation number 20.
Calculating permutation number 21.
Calculating permutation number 22.
Calculating permutation number 23.
Calculating permutation number 24.
Calculating permutation number 25.
Calculating permutation number 26.
Calculating permutation number 27.
Calculating permutation number 28.
Calculating permutation number 29.
Calculating permutation number 30.
Calculating permutation number 31.
Calculating permutation number 32.
Calculating permutation number 33.
Calculating permutation number 34.
Calculating permutation number 35.
Calculating permutation number 36.
Calculating permutation number 37.
Calculating permutation number 38.
Calculating permutation number 39.
Calculating permutation number 40.
Calculating permutation number 41.
Calculating permutation number 42.
Calculating permutation number 43.
Calculating permutation number 44.
Calculating permutation number 45.
Calculating permutation number 46.
Calculating permutation number 47.
Calculating permutation number 48.
Calculating permutation number 49.
Calculating permutation number 50.
Classifying the differential correlation calls.
Calculating the differential correlation average.
Calculating the differential correlation average.
moduleDC_res
# 1. Extract and prepare data
module_raw_names <- moduleDC_res$Module # e.g., c("blue", "red", "green")
module_meanDCs <- moduleDC_res$MeDC # e.g., c(0.1, -0.2, 0.05)
module_pVals <- moduleDC_res$pVal # e.g., c(0.001, 0.05, 0.1)
# 2. Create the desired module order with "ME" prefix
module_order <- paste0("ME", module_raw_names) # This will be MEblue, MEred, MEgreen
# 3. Create the heatmap matrix and assign meaningful row names
dgca_heatmap_matrix <- as.matrix(module_meanDCs)
# Assign the new module names as row names for the matrix.
# This is crucial for labeledHeatmap to correctly associate labels with rows.
rownames(dgca_heatmap_matrix) <- module_order
colnames(dgca_heatmap_matrix) <- c("Mean Diff. Cor.") # This column name will be for xLabel
# 4. Prepare text matrix for p-values and meanDCs
dgca_textMatrix = paste(signif(dgca_heatmap_matrix, 2), "\n(",
signif(module_pVals, 1), ")", sep = "");
dim(dgca_textMatrix) = dim(dgca_heatmap_matrix) # Ensure dimensions are correctly set
# Plotting
cat("\n--- DGCA Pre-defined Module-level Differential Correlation Heatmap ---\n")
--- DGCA Pre-defined Module-level Differential Correlation Heatmap ---
sizeGrWindow(6, 8) # Adjust window size for plot
# Define output file path and name
output_png_file <- paste0("dcga/DGCA_Module_MeanDC_Heatmap_Effect_", log_module_effect_magnitude, ".png")
# Ensure the 'dcga' directory exists. If not, create it.
if (!dir.exists("dcga")) {
dir.create("dcga")
}
# # Open PNG device to save the plot
# png(output_png_file, width = 600, height = 800, res=100, bg = "white")
#
# # Adjust plot margins (bottom, left, top, right)
# # Increase left margin to ensure enough space for longer module names
# par(mar = c(6, 12, 3, 3))
#
# labeledHeatmap(Matrix = dgca_heatmap_matrix,
# xLabels = "Condition", # Overall label for the x-axis
# yLabels = module_order, # Overall label for the y-axis
# ySymbols = rownames(dgca_heatmap_matrix), # Individual labels for each row (MEblue, MEred, etc.)
# colorLabels = FALSE,
# colors = blueWhiteRed(50), # Color scale (from WGCNA)
# textMatrix = dgca_textMatrix, # Text to display inside heatmap cells
# setStdMargins = FALSE,
# cex.text = 0.7, # Size of text INSIDE the cells (meanDC (p-val))
# cex.lab = 1, # Size of "Condition" and "Module Names" overall labels
# cex.rowLabels = 0.9, # **CRITICAL**: Size of the individual row labels (ySymbols). Adjust as needed.
# zlim = c(-max(abs(dgca_heatmap_matrix)), max(abs(dgca_heatmap_matrix))), # Symmetric color scale
# main = paste("DGCA: Median Within-Module Diff")) # Main plot title
#
# Plotting the heatmap using labeledHeatmap
# Customize text matrix for correlation and p-value display
dev.off()
null device
1
png(output_png_file,width=6,height=5,units="in",res=600)
textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
par(mar = c(6, 10, 3, 3))
par(mar = c(8, 8, 2, 1)) # Reduce margins (bottom, left, top, right)
labeledHeatmap(Matrix = dgca_heatmap_matrix,
xLabels = "Condition",
yLabels = module_order,
ySymbols = rownames(dgca_heatmap_matrix),
colorLabels = FALSE,
colors = blueWhiteRed(50), # Red for positive, blue for negative correlation
textMatrix = dgca_textMatrix,
setStdMargins = FALSE,
cex.text = 0.5, # Adjust text size
# zlim = c(-1, 1),
main = paste("DGCA")
)
cat("\nModule-Trait Relationship Heatmap generated.\n")
Module-Trait Relationship Heatmap generated.
dev.off()
null device
1
cat(sprintf("Saved %s\n\n", output_png_file))
Saved dcga/DGCA_Module_MeanDC_Heatmap_Effect_0.1.png
log_module_effect_magnitude = 0.9
dgca_design <- data.frame(
Healthy = as.numeric(!datTraits$Condition),
Diseased = as.numeric(datTraits$Condition))
rownames(dgca_design) <- rownames(datTraits)
synthetic_expression_data = read.csv("synthetic_expression_data_effect_0.9.csv", header=T, row.names = "X")
modules_list = read.csv("wgcna/moduleColors_WGCNA_0.9.csv", header=T)
moduleDC_res <- moduleDC(inputMat = synthetic_expression_data,
design = as.matrix(dgca_design),
compare = c("Healthy","Diseased"), # Column name in dgca_design
genes = modules_list$genes,
labels = modules_list$modules,
nPerms = 50,
number_DC_genes = 10,
dCorAvgMethod = "median")
Calculating MDC for module #1, which is called brown
Calculating permutation number 1.
Calculating permutation number 2.
Calculating permutation number 3.
Calculating permutation number 4.
Calculating permutation number 5.
Calculating permutation number 6.
Calculating permutation number 7.
Calculating permutation number 8.
Calculating permutation number 9.
Calculating permutation number 10.
Calculating permutation number 11.
Calculating permutation number 12.
Calculating permutation number 13.
Calculating permutation number 14.
Calculating permutation number 15.
Calculating permutation number 16.
Calculating permutation number 17.
Calculating permutation number 18.
Calculating permutation number 19.
Calculating permutation number 20.
Calculating permutation number 21.
Calculating permutation number 22.
Calculating permutation number 23.
Calculating permutation number 24.
Calculating permutation number 25.
Calculating permutation number 26.
Calculating permutation number 27.
Calculating permutation number 28.
Calculating permutation number 29.
Calculating permutation number 30.
Calculating permutation number 31.
Calculating permutation number 32.
Calculating permutation number 33.
Calculating permutation number 34.
Calculating permutation number 35.
Calculating permutation number 36.
Calculating permutation number 37.
Calculating permutation number 38.
Calculating permutation number 39.
Calculating permutation number 40.
Calculating permutation number 41.
Calculating permutation number 42.
Calculating permutation number 43.
Calculating permutation number 44.
Calculating permutation number 45.
Calculating permutation number 46.
Calculating permutation number 47.
Calculating permutation number 48.
Calculating permutation number 49.
Calculating permutation number 50.
Classifying the differential correlation calls.
Calculating the differential correlation average.
Calculating the differential correlation average.
Calculating MDC for module #2, which is called yellow
Calculating permutation number 1.
Calculating permutation number 2.
Calculating permutation number 3.
Calculating permutation number 4.
Calculating permutation number 5.
Calculating permutation number 6.
Calculating permutation number 7.
Calculating permutation number 8.
Calculating permutation number 9.
Calculating permutation number 10.
Calculating permutation number 11.
Calculating permutation number 12.
Calculating permutation number 13.
Calculating permutation number 14.
Calculating permutation number 15.
Calculating permutation number 16.
Calculating permutation number 17.
Calculating permutation number 18.
Calculating permutation number 19.
Calculating permutation number 20.
Calculating permutation number 21.
Calculating permutation number 22.
Calculating permutation number 23.
Calculating permutation number 24.
Calculating permutation number 25.
Calculating permutation number 26.
Calculating permutation number 27.
Calculating permutation number 28.
Calculating permutation number 29.
Calculating permutation number 30.
Calculating permutation number 31.
Calculating permutation number 32.
Calculating permutation number 33.
Calculating permutation number 34.
Calculating permutation number 35.
Calculating permutation number 36.
Calculating permutation number 37.
Calculating permutation number 38.
Calculating permutation number 39.
Calculating permutation number 40.
Calculating permutation number 41.
Calculating permutation number 42.
Calculating permutation number 43.
Calculating permutation number 44.
Calculating permutation number 45.
Calculating permutation number 46.
Calculating permutation number 47.
Calculating permutation number 48.
Calculating permutation number 49.
Calculating permutation number 50.
Classifying the differential correlation calls.
Calculating the differential correlation average.
Calculating the differential correlation average.
Calculating MDC for module #3, which is called turquoise
Calculating permutation number 1.
Calculating permutation number 2.
Calculating permutation number 3.
Calculating permutation number 4.
Calculating permutation number 5.
Calculating permutation number 6.
Calculating permutation number 7.
Calculating permutation number 8.
Calculating permutation number 9.
Calculating permutation number 10.
Calculating permutation number 11.
Calculating permutation number 12.
Calculating permutation number 13.
Calculating permutation number 14.
Calculating permutation number 15.
Calculating permutation number 16.
Calculating permutation number 17.
Calculating permutation number 18.
Calculating permutation number 19.
Calculating permutation number 20.
Calculating permutation number 21.
Calculating permutation number 22.
Calculating permutation number 23.
Calculating permutation number 24.
Calculating permutation number 25.
Calculating permutation number 26.
Calculating permutation number 27.
Calculating permutation number 28.
Calculating permutation number 29.
Calculating permutation number 30.
Calculating permutation number 31.
Calculating permutation number 32.
Calculating permutation number 33.
Calculating permutation number 34.
Calculating permutation number 35.
Calculating permutation number 36.
Calculating permutation number 37.
Calculating permutation number 38.
Calculating permutation number 39.
Calculating permutation number 40.
Calculating permutation number 41.
Calculating permutation number 42.
Calculating permutation number 43.
Calculating permutation number 44.
Calculating permutation number 45.
Calculating permutation number 46.
Calculating permutation number 47.
Calculating permutation number 48.
Calculating permutation number 49.
Calculating permutation number 50.
Classifying the differential correlation calls.
Calculating the differential correlation average.
Calculating the differential correlation average.
Calculating MDC for module #4, which is called green
Calculating permutation number 1.
Calculating permutation number 2.
Calculating permutation number 3.
Calculating permutation number 4.
Calculating permutation number 5.
Calculating permutation number 6.
Calculating permutation number 7.
Calculating permutation number 8.
Calculating permutation number 9.
Calculating permutation number 10.
Calculating permutation number 11.
Calculating permutation number 12.
Calculating permutation number 13.
Calculating permutation number 14.
Calculating permutation number 15.
Calculating permutation number 16.
Calculating permutation number 17.
Calculating permutation number 18.
Calculating permutation number 19.
Calculating permutation number 20.
Calculating permutation number 21.
Calculating permutation number 22.
Calculating permutation number 23.
Calculating permutation number 24.
Calculating permutation number 25.
Calculating permutation number 26.
Calculating permutation number 27.
Calculating permutation number 28.
Calculating permutation number 29.
Calculating permutation number 30.
Calculating permutation number 31.
Calculating permutation number 32.
Calculating permutation number 33.
Calculating permutation number 34.
Calculating permutation number 35.
Calculating permutation number 36.
Calculating permutation number 37.
Calculating permutation number 38.
Calculating permutation number 39.
Calculating permutation number 40.
Calculating permutation number 41.
Calculating permutation number 42.
Calculating permutation number 43.
Calculating permutation number 44.
Calculating permutation number 45.
Calculating permutation number 46.
Calculating permutation number 47.
Calculating permutation number 48.
Calculating permutation number 49.
Calculating permutation number 50.
Classifying the differential correlation calls.
Calculating the differential correlation average.
Calculating the differential correlation average.
Calculating MDC for module #5, which is called pink
Calculating permutation number 1.
Calculating permutation number 2.
Calculating permutation number 3.
Calculating permutation number 4.
Calculating permutation number 5.
Calculating permutation number 6.
Calculating permutation number 7.
Calculating permutation number 8.
Calculating permutation number 9.
Calculating permutation number 10.
Calculating permutation number 11.
Calculating permutation number 12.
Calculating permutation number 13.
Calculating permutation number 14.
Calculating permutation number 15.
Calculating permutation number 16.
Calculating permutation number 17.
Calculating permutation number 18.
Calculating permutation number 19.
Calculating permutation number 20.
Calculating permutation number 21.
Calculating permutation number 22.
Calculating permutation number 23.
Calculating permutation number 24.
Calculating permutation number 25.
Calculating permutation number 26.
Calculating permutation number 27.
Calculating permutation number 28.
Calculating permutation number 29.
Calculating permutation number 30.
Calculating permutation number 31.
Calculating permutation number 32.
Calculating permutation number 33.
Calculating permutation number 34.
Calculating permutation number 35.
Calculating permutation number 36.
Calculating permutation number 37.
Calculating permutation number 38.
Calculating permutation number 39.
Calculating permutation number 40.
Calculating permutation number 41.
Calculating permutation number 42.
Calculating permutation number 43.
Calculating permutation number 44.
Calculating permutation number 45.
Calculating permutation number 46.
Calculating permutation number 47.
Calculating permutation number 48.
Calculating permutation number 49.
Calculating permutation number 50.
Classifying the differential correlation calls.
Calculating the differential correlation average.
Calculating the differential correlation average.
Calculating MDC for module #6, which is called red
Calculating permutation number 1.
Calculating permutation number 2.
Calculating permutation number 3.
Calculating permutation number 4.
Calculating permutation number 5.
Calculating permutation number 6.
Calculating permutation number 7.
Calculating permutation number 8.
Calculating permutation number 9.
Calculating permutation number 10.
Calculating permutation number 11.
Calculating permutation number 12.
Calculating permutation number 13.
Calculating permutation number 14.
Calculating permutation number 15.
Calculating permutation number 16.
Calculating permutation number 17.
Calculating permutation number 18.
Calculating permutation number 19.
Calculating permutation number 20.
Calculating permutation number 21.
Calculating permutation number 22.
Calculating permutation number 23.
Calculating permutation number 24.
Calculating permutation number 25.
Calculating permutation number 26.
Calculating permutation number 27.
Calculating permutation number 28.
Calculating permutation number 29.
Calculating permutation number 30.
Calculating permutation number 31.
Calculating permutation number 32.
Calculating permutation number 33.
Calculating permutation number 34.
Calculating permutation number 35.
Calculating permutation number 36.
Calculating permutation number 37.
Calculating permutation number 38.
Calculating permutation number 39.
Calculating permutation number 40.
Calculating permutation number 41.
Calculating permutation number 42.
Calculating permutation number 43.
Calculating permutation number 44.
Calculating permutation number 45.
Calculating permutation number 46.
Calculating permutation number 47.
Calculating permutation number 48.
Calculating permutation number 49.
Calculating permutation number 50.
Classifying the differential correlation calls.
Calculating the differential correlation average.
Calculating the differential correlation average.
Calculating MDC for module #7, which is called black
Calculating permutation number 1.
Calculating permutation number 2.
Calculating permutation number 3.
Calculating permutation number 4.
Calculating permutation number 5.
Calculating permutation number 6.
Calculating permutation number 7.
Calculating permutation number 8.
Calculating permutation number 9.
Calculating permutation number 10.
Calculating permutation number 11.
Calculating permutation number 12.
Calculating permutation number 13.
Calculating permutation number 14.
Calculating permutation number 15.
Calculating permutation number 16.
Calculating permutation number 17.
Calculating permutation number 18.
Calculating permutation number 19.
Calculating permutation number 20.
Calculating permutation number 21.
Calculating permutation number 22.
Calculating permutation number 23.
Calculating permutation number 24.
Calculating permutation number 25.
Calculating permutation number 26.
Calculating permutation number 27.
Calculating permutation number 28.
Calculating permutation number 29.
Calculating permutation number 30.
Calculating permutation number 31.
Calculating permutation number 32.
Calculating permutation number 33.
Calculating permutation number 34.
Calculating permutation number 35.
Calculating permutation number 36.
Calculating permutation number 37.
Calculating permutation number 38.
Calculating permutation number 39.
Calculating permutation number 40.
Calculating permutation number 41.
Calculating permutation number 42.
Calculating permutation number 43.
Calculating permutation number 44.
Calculating permutation number 45.
Calculating permutation number 46.
Calculating permutation number 47.
Calculating permutation number 48.
Calculating permutation number 49.
Calculating permutation number 50.
Classifying the differential correlation calls.
Calculating the differential correlation average.
Calculating the differential correlation average.
Calculating MDC for module #8, which is called blue
Calculating permutation number 1.
Calculating permutation number 2.
Calculating permutation number 3.
Calculating permutation number 4.
Calculating permutation number 5.
Calculating permutation number 6.
Calculating permutation number 7.
Calculating permutation number 8.
Calculating permutation number 9.
Calculating permutation number 10.
Calculating permutation number 11.
Calculating permutation number 12.
Calculating permutation number 13.
Calculating permutation number 14.
Calculating permutation number 15.
Calculating permutation number 16.
Calculating permutation number 17.
Calculating permutation number 18.
Calculating permutation number 19.
Calculating permutation number 20.
Calculating permutation number 21.
Calculating permutation number 22.
Calculating permutation number 23.
Calculating permutation number 24.
Calculating permutation number 25.
Calculating permutation number 26.
Calculating permutation number 27.
Calculating permutation number 28.
Calculating permutation number 29.
Calculating permutation number 30.
Calculating permutation number 31.
Calculating permutation number 32.
Calculating permutation number 33.
Calculating permutation number 34.
Calculating permutation number 35.
Calculating permutation number 36.
Calculating permutation number 37.
Calculating permutation number 38.
Calculating permutation number 39.
Calculating permutation number 40.
Calculating permutation number 41.
Calculating permutation number 42.
Calculating permutation number 43.
Calculating permutation number 44.
Calculating permutation number 45.
Calculating permutation number 46.
Calculating permutation number 47.
Calculating permutation number 48.
Calculating permutation number 49.
Calculating permutation number 50.
Classifying the differential correlation calls.
Calculating the differential correlation average.
Calculating the differential correlation average.
Calculating MDC for module #9, which is called magenta
Calculating permutation number 1.
Calculating permutation number 2.
Calculating permutation number 3.
Calculating permutation number 4.
Calculating permutation number 5.
Calculating permutation number 6.
Calculating permutation number 7.
Calculating permutation number 8.
Calculating permutation number 9.
Calculating permutation number 10.
Calculating permutation number 11.
Calculating permutation number 12.
Calculating permutation number 13.
Calculating permutation number 14.
Calculating permutation number 15.
Calculating permutation number 16.
Calculating permutation number 17.
Calculating permutation number 18.
Calculating permutation number 19.
Calculating permutation number 20.
Calculating permutation number 21.
Calculating permutation number 22.
Calculating permutation number 23.
Calculating permutation number 24.
Calculating permutation number 25.
Calculating permutation number 26.
Calculating permutation number 27.
Calculating permutation number 28.
Calculating permutation number 29.
Calculating permutation number 30.
Calculating permutation number 31.
Calculating permutation number 32.
Calculating permutation number 33.
Calculating permutation number 34.
Calculating permutation number 35.
Calculating permutation number 36.
Calculating permutation number 37.
Calculating permutation number 38.
Calculating permutation number 39.
Calculating permutation number 40.
Calculating permutation number 41.
Calculating permutation number 42.
Calculating permutation number 43.
Calculating permutation number 44.
Calculating permutation number 45.
Calculating permutation number 46.
Calculating permutation number 47.
Calculating permutation number 48.
Calculating permutation number 49.
Calculating permutation number 50.
Classifying the differential correlation calls.
Calculating the differential correlation average.
Calculating the differential correlation average.
Calculating MDC for module #10, which is called purple
Calculating permutation number 1.
Calculating permutation number 2.
Calculating permutation number 3.
Calculating permutation number 4.
Calculating permutation number 5.
Calculating permutation number 6.
Calculating permutation number 7.
Calculating permutation number 8.
Calculating permutation number 9.
Calculating permutation number 10.
Calculating permutation number 11.
Calculating permutation number 12.
Calculating permutation number 13.
Calculating permutation number 14.
Calculating permutation number 15.
Calculating permutation number 16.
Calculating permutation number 17.
Calculating permutation number 18.
Calculating permutation number 19.
Calculating permutation number 20.
Calculating permutation number 21.
Calculating permutation number 22.
Calculating permutation number 23.
Calculating permutation number 24.
Calculating permutation number 25.
Calculating permutation number 26.
Calculating permutation number 27.
Calculating permutation number 28.
Calculating permutation number 29.
Calculating permutation number 30.
Calculating permutation number 31.
Calculating permutation number 32.
Calculating permutation number 33.
Calculating permutation number 34.
Calculating permutation number 35.
Calculating permutation number 36.
Calculating permutation number 37.
Calculating permutation number 38.
Calculating permutation number 39.
Calculating permutation number 40.
Calculating permutation number 41.
Calculating permutation number 42.
Calculating permutation number 43.
Calculating permutation number 44.
Calculating permutation number 45.
Calculating permutation number 46.
Calculating permutation number 47.
Calculating permutation number 48.
Calculating permutation number 49.
Calculating permutation number 50.
Classifying the differential correlation calls.
Calculating the differential correlation average.
Calculating the differential correlation average.
Calculating MDC for module #11, which is called greenyellow
Calculating permutation number 1.
Calculating permutation number 2.
Calculating permutation number 3.
Calculating permutation number 4.
Calculating permutation number 5.
Calculating permutation number 6.
Calculating permutation number 7.
Calculating permutation number 8.
Calculating permutation number 9.
Calculating permutation number 10.
Calculating permutation number 11.
Calculating permutation number 12.
Calculating permutation number 13.
Calculating permutation number 14.
Calculating permutation number 15.
Calculating permutation number 16.
Calculating permutation number 17.
Calculating permutation number 18.
Calculating permutation number 19.
Calculating permutation number 20.
Calculating permutation number 21.
Calculating permutation number 22.
Calculating permutation number 23.
Calculating permutation number 24.
Calculating permutation number 25.
Calculating permutation number 26.
Calculating permutation number 27.
Calculating permutation number 28.
Calculating permutation number 29.
Calculating permutation number 30.
Calculating permutation number 31.
Calculating permutation number 32.
Calculating permutation number 33.
Calculating permutation number 34.
Calculating permutation number 35.
Calculating permutation number 36.
Calculating permutation number 37.
Calculating permutation number 38.
Calculating permutation number 39.
Calculating permutation number 40.
Calculating permutation number 41.
Calculating permutation number 42.
Calculating permutation number 43.
Calculating permutation number 44.
Calculating permutation number 45.
Calculating permutation number 46.
Calculating permutation number 47.
Calculating permutation number 48.
Calculating permutation number 49.
Calculating permutation number 50.
Classifying the differential correlation calls.
Calculating the differential correlation average.
Calculating the differential correlation average.
moduleDC_res
# 1. Extract and prepare data
module_raw_names <- moduleDC_res$Module # e.g., c("blue", "red", "green")
module_meanDCs <- moduleDC_res$MeDC # e.g., c(0.1, -0.2, 0.05)
module_pVals <- moduleDC_res$pVal # e.g., c(0.001, 0.05, 0.1)
# 2. Create the desired module order with "ME" prefix
module_order <- paste0("ME", module_raw_names) # This will be MEblue, MEred, MEgreen
# 3. Create the heatmap matrix and assign meaningful row names
dgca_heatmap_matrix <- as.matrix(module_meanDCs)
# Assign the new module names as row names for the matrix.
# This is crucial for labeledHeatmap to correctly associate labels with rows.
rownames(dgca_heatmap_matrix) <- module_order
colnames(dgca_heatmap_matrix) <- c("Mean Diff. Cor.") # This column name will be for xLabel
# 4. Prepare text matrix for p-values and meanDCs
dgca_textMatrix = paste(signif(dgca_heatmap_matrix, 2), "\n(",
signif(module_pVals, 1), ")", sep = "");
dim(dgca_textMatrix) = dim(dgca_heatmap_matrix) # Ensure dimensions are correctly set
# Plotting
cat("\n--- DGCA Pre-defined Module-level Differential Correlation Heatmap ---\n")
--- DGCA Pre-defined Module-level Differential Correlation Heatmap ---
sizeGrWindow(6, 8) # Adjust window size for plot
# Define output file path and name
output_png_file <- paste0("dcga/DGCA_Module_MeanDC_Heatmap_Effect_", log_module_effect_magnitude, ".png")
# Ensure the 'dcga' directory exists. If not, create it.
if (!dir.exists("dcga")) {
dir.create("dcga")
}
dev.off()
null device
1
png(output_png_file,width=6,height=5,units="in",res=600)
textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
par(mar = c(6, 10, 3, 3))
par(mar = c(8, 8, 2, 1)) # Reduce margins (bottom, left, top, right)
labeledHeatmap(Matrix = dgca_heatmap_matrix,
xLabels = "Condition",
yLabels = module_order,
ySymbols = rownames(dgca_heatmap_matrix),
colorLabels = FALSE,
colors = blueWhiteRed(50), # Red for positive, blue for negative correlation
textMatrix = dgca_textMatrix,
setStdMargins = FALSE,
cex.text = 0.5, # Adjust text size
# zlim = c(-1, 1),
main = paste("DGCA")
)
cat("\nModule-Trait Relationship Heatmap generated.\n")
Module-Trait Relationship Heatmap generated.
dev.off()
null device
1
cat(sprintf("Saved %s\n\n", output_png_file))
Saved dcga/DGCA_Module_MeanDC_Heatmap_Effect_0.9.png
# renv::install("diffcoexp")
# renv::install("GEOquery")
library(diffcoexp)
allowWGCNAThreads()
Allowing multi-threading with up to 8 threads.
exprs.h = read.csv("dcor-method/synthetic_healthy_0.1.txt", sep = "\t", header=T, row.names = "Genes")
exprs.d = read.csv("dcor-method/synthetic_diseased_0.1.txt", sep = "\t", header=T, row.names = "Genes")
allowWGCNAThreads()
Allowing multi-threading with up to 8 threads.
# Diseased - Healthy; high correlation --> diseased
# Step 1
res.low =diffcoexp(exprs.1 = exprs.d, exprs.2 = exprs.h,
r.method = "pearson",q.method = "bonferroni")
Finished running comparecor.
Finished running coexpr.
2484 gene pairs remain after half thresholding.
167 DCLs identified.
5 DCGs identified.
library(WGCNA)
# WGCNA often recommends setting this
options(stringsAsFactors = FALSE)
allowWGCNAThreads() # Use multiple cores if available
Allowing multi-threading with up to 8 threads.
# --- Input Data ---
# Read expression data for two conditions
# Ensure 'row.names = "Genes"' correctly identifies the gene ID column
exprs.1 = read.csv("dcor-method/synthetic_healthy_0.1.txt", sep = "\t", header=T, row.names = "Genes")
exprs.2 = read.csv("dcor-method/synthetic_diseased_0.1.txt", sep = "\t", header=T, row.names = "Genes")
# Ensure gene order is consistent across conditions
commonGenes <- intersect(rownames(exprs.1), rownames(exprs.2))
exprs.1 <- exprs.1[commonGenes, ]
exprs.2 <- exprs.2[commonGenes, ]
# IMPORTANT for WGCNA: Transpose data so samples are rows and genes are columns
datExpr1 <- as.data.frame(t(exprs.1))
datExpr2 <- as.data.frame(t(exprs.2))
# Combine expression data for module eigengene calculation and trait mapping
# The column names (genes) must be identical for cbind to work correctly
datExpr_combined <- t(cbind(exprs.1, exprs.2))
cat("Data dimensions for WGCNA (samples x genes):\n")
Data dimensions for WGCNA (samples x genes):
cat("Condition 1:", dim(datExpr1), "\n")
Condition 1: 50 300
cat("Condition 2:", dim(datExpr2), "\n")
Condition 2: 50 300
cat("Combined data:", dim(datExpr_combined), "\n\n")
Combined data: 100 300
# --- Define Soft-thresholding Power (beta) ---
# As per the paper, beta is a tuning parameter for the adjacency difference matrix.
# Its choice impacts the stringency of differential correlation.
beta = 1 # Example value, adjust as needed based on your data and desired stringency
# ==============================================================================
# DiffCoEx Algorithm Steps
# ==============================================================================
# --- Step 1: Build correlation matrix C[k] within each condition k ---
cat("Step 1: Computing correlation matrices for each condition...\n")
Step 1: Computing correlation matrices for each condition...
corMatrix1 <- WGCNA::cor(datExpr1, method = "spearman", use = "pairwise.complete.obs")
corMatrix2 <- WGCNA::cor(datExpr2, method = "spearman", use = "pairwise.complete.obs")
cat("Correlation matrices computed.\n\n")
Correlation matrices computed.
# --- Step 2: Compute matrix of adjacency difference D ---
# d_ij = |sign(cor_ij^(1))(cor_ij^(1))^2 - sign(cor_ij^(2))(cor_ij^(2))^2|^beta
cat("Step 2: Computing the adjacency difference matrix D...\n")
Step 2: Computing the adjacency difference matrix D...
# Get all unique genes (should be commonGenes from above)
allGenes <- colnames(datExpr1)
# Initialize D matrix with zeros
D_matrix <- matrix(0,
nrow = length(allGenes),
ncol = length(allGenes),
dimnames = list(allGenes, allGenes))
# Loop through all gene pairs to calculate D_ij
for (i in 1:length(allGenes)) {
for (j in i:length(allGenes)) { # Loop j from i for upper triangle (and symmetry)
gene_i <- allGenes[i]
gene_j <- allGenes[j]
# Get correlations for the pair
cor_ij_1 <- corMatrix1[gene_i, gene_j]
cor_ij_2 <- corMatrix2[gene_i, gene_j]
# Handle potential NA correlations
# If a correlation is NA in either condition, set differential to 0 for that pair
if (is.na(cor_ij_1) || is.na(cor_ij_2)) {
D_matrix[gene_i, gene_j] <- 0
D_matrix[gene_j, gene_i] <- 0
next
}
# Calculate signed squared correlations
s_ij_1 <- sign(cor_ij_1) * (cor_ij_1^2)
s_ij_2 <- sign(cor_ij_2) * (cor_ij_2^2)
# Calculate d_ij as per paper's formula: |s_ij_1 - s_ij_2|^beta
d_ij_val <- abs(s_ij_1 - s_ij_2)^beta
# Populate D_matrix symmetrically
D_matrix[gene_i, gene_j] <- d_ij_val
D_matrix[gene_j, gene_i] <- d_ij_val # Ensure symmetry
}
}
cat("Adjacency difference matrix D computed. Its range is [", min(D_matrix), ", ", max(D_matrix), "]\n\n")
Adjacency difference matrix D computed. Its range is [ 0 , 0.5624463 ]
# --- Step 3: Derive the Topological Overlap (TO) based dissimilarity matrix T from D ---
cat("Step 3: Computing Topological Overlap Dissimilarity (TOM) from D...\n")
Step 3: Computing Topological Overlap Dissimilarity (TOM) from D...
# It's good practice to ensure the diagonal is 1 for adjacency matrices before TOM
diag(D_matrix) <- 1 # Gene is perfectly "differentially coexpressed" with itself conceptually
# Use WGCNA's TOMsimilarity function on our custom differential adjacency matrix D.
# TOMType="unsigned" is generally appropriate for D_matrix values which are [0,1].
TOM_matrix <- TOMsimilarity(D_matrix, TOMType = "unsigned")
..connectivity..
..matrix multiplication (system BLAS)..
..normalization..
..done.
# Convert TOM to dissimilarity (1 - TOM)
T_dissimilarity <- 1 - TOM_matrix
cat("Topological Overlap Dissimilarity matrix T computed.\n\n")
Topological Overlap Dissimilarity matrix T computed.
# --- Step 4: Clustering and Module Identification ---
cat("Step 4: Performing hierarchical clustering and module identification...\n")
Step 4: Performing hierarchical clustering and module identification...
# Hierarchical clustering
geneTree_diffcoex <- hclust(as.dist(T_dissimilarity), method = "average")
# Plot the dendrogram
plot(geneTree_diffcoex, xlab = "", sub = "", main = "Gene dendrogram based on differential TOM",
labels = FALSE, hang = 0.04)
# Module identification using dynamicTreeCut
minModuleSize_diffcoex <- 10 # Adjust based on your data and desired module size
dynamicMods_diffcoex <- cutreeDynamic(dendro = geneTree_diffcoex,
distM = T_dissimilarity,
deepSplit = 2, # Adjust deepSplit for granularity (0-4)
pamRespectsDendro = FALSE, # Set to TRUE for PAM refinement
minClusterSize = minModuleSize_diffcoex)
..cutHeight not given, setting it to 0.956 ===> 99% of the (truncated) height range in dendro.
..done.
# Assign module colors (for visualization and further analysis)
moduleColors_diffcoex <- labels2colors(dynamicMods_diffcoex)
cat("Module identification complete.\n")
Module identification complete.
cat("Number of differential co-expression modules found:", length(unique(moduleColors_diffcoex)) - 1,
" (excluding grey for unassigned genes)\n") # -1 for 'grey' module
Number of differential co-expression modules found: 9 (excluding grey for unassigned genes)
cat("Sizes of modules:\n")
Sizes of modules:
print(table(moduleColors_diffcoex))
moduleColors_diffcoex
black blue brown green magenta pink purple red turquoise yellow
30 31 31 30 29 29 28 30 32 30
# Output the module assignments to a data frame
differential_modules <- data.frame(
Gene = colnames(datExpr1),
Module = moduleColors_diffcoex
)
# Save the dataframe to a CSV file
output_file <- "diffcoexp/moduleColors_synthetic_0.1.csv"
write.csv(differential_modules, file = output_file, row.names = F)
cat(paste0("\nModule assignments saved to ", output_file, "\n"))
Module assignments saved to diffcoexp/moduleColors_synthetic_0.1.csv
# --- New Section: Plotting Module-Trait Relationships ---
cat("\n--- Plotting Module-Trait Relationships ---\n")
--- Plotting Module-Trait Relationships ---
# Create a trait data frame for your samples
# The row names of the trait data frame must match the sample names in datExpr_combined
trait_data <- data.frame(
Sample = c(rownames(datExpr1), rownames(datExpr2)),
Condition = c(rep("Healthy", nrow(datExpr1)), rep("Diseased", nrow(datExpr2)))
)
rownames(trait_data) <- trait_data$Sample # Set sample names as row names
# Convert categorical trait to numeric for correlation (WGCNA expects numeric)
# 0 for Healthy, 1 for Diseased (arbitrary assignment)
trait_numeric <- as.data.frame(as.numeric(trait_data$Condition == "Diseased"))
rownames(trait_numeric) <- rownames(trait_data)
colnames(trait_numeric) <- "Diseased_vs_Healthy"
cat("Trait data prepared.\n")
Trait data prepared.
print(head(trait_numeric))
cat("\n")
# Calculate Module Eigengenes (MEs)
# MEs represent the "average" expression profile of each module.
# Use the combined expression data and the identified module colors.
MEs_diffcoex <- moduleEigengenes(datExpr_combined, moduleColors_diffcoex)$eigengenes
cat("Module Eigengenes calculated.\n")
Module Eigengenes calculated.
print(head(MEs_diffcoex))
cat("\n")
# Relate MEs to the trait (Condition)
# Calculate correlations and their p-values
moduleTraitCor <- WGCNA::cor(MEs_diffcoex, trait_numeric, use = "pairwise.complete.obs", method = "pearson") # Pearson is common for MEs
moduleTraitPvalue <- WGCNA::corPvalueStudent(moduleTraitCor, nrow(datExpr_combined)) # Number of samples
cat("Module-trait correlations and p-values calculated.\n\n")
Module-trait correlations and p-values calculated.
# Plotting the heatmap using labeledHeatmap
# Customize text matrix for correlation and p-value display
dev.off()
null device
1
png("diffcoexp/DiffCoExp_Synthetic_0.1.png",width=6,height=5,units="in",res=600)
textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
par(mar = c(6, 10, 3, 3))
par(mar = c(8, 8, 2, 1)) # Reduce margins (bottom, left, top, right)
labeledHeatmap(Matrix = moduleTraitCor,
xLabels = "Condition",
yLabels = names(MEs_diffcoex),
ySymbols = names(MEs_diffcoex),
colorLabels = FALSE,
colors = blueWhiteRed(50), # Red for positive, blue for negative correlation
textMatrix = textMatrix,
setStdMargins = FALSE,
cex.text = 0.5, # Adjust text size
zlim = c(-1, 1),
main = paste("DiffCoEx Modules")
)
cat("\nModule-Trait Relationship Heatmap generated.\n")
Module-Trait Relationship Heatmap generated.
dev.off()
null device
1
library(WGCNA)
# WGCNA often recommends setting this
options(stringsAsFactors = FALSE)
allowWGCNAThreads() # Use multiple cores if available
Allowing multi-threading with up to 8 threads.
# --- Input Data ---
# Read expression data for two conditions
# Ensure 'row.names = "Genes"' correctly identifies the gene ID column
exprs.1 = read.csv("dcor-method/synthetic_healthy_0.9.txt", sep = "\t", header=T, row.names = "Genes")
exprs.2 = read.csv("dcor-method/synthetic_diseased_0.9.txt", sep = "\t", header=T, row.names = "Genes")
# Ensure gene order is consistent across conditions
commonGenes <- intersect(rownames(exprs.1), rownames(exprs.2))
exprs.1 <- exprs.1[commonGenes, ]
exprs.2 <- exprs.2[commonGenes, ]
# IMPORTANT for WGCNA: Transpose data so samples are rows and genes are columns
datExpr1 <- as.data.frame(t(exprs.1))
datExpr2 <- as.data.frame(t(exprs.2))
# Combine expression data for module eigengene calculation and trait mapping
# The column names (genes) must be identical for cbind to work correctly
datExpr_combined <- t(cbind(exprs.1, exprs.2))
cat("Data dimensions for WGCNA (samples x genes):\n")
Data dimensions for WGCNA (samples x genes):
cat("Condition 1:", dim(datExpr1), "\n")
Condition 1: 50 300
cat("Condition 2:", dim(datExpr2), "\n")
Condition 2: 50 300
cat("Combined data:", dim(datExpr_combined), "\n\n")
Combined data: 100 300
# --- Define Soft-thresholding Power (beta) ---
# As per the paper, beta is a tuning parameter for the adjacency difference matrix.
# Its choice impacts the stringency of differential correlation.
beta = 1 # Example value, adjust as needed based on your data and desired stringency
# ==============================================================================
# DiffCoEx Algorithm Steps
# ==============================================================================
# --- Step 1: Build correlation matrix C[k] within each condition k ---
cat("Step 1: Computing correlation matrices for each condition...\n")
Step 1: Computing correlation matrices for each condition...
corMatrix1 <- WGCNA::cor(datExpr1, method = "spearman", use = "pairwise.complete.obs")
corMatrix2 <- WGCNA::cor(datExpr2, method = "spearman", use = "pairwise.complete.obs")
cat("Correlation matrices computed.\n\n")
Correlation matrices computed.
# --- Step 2: Compute matrix of adjacency difference D ---
# d_ij = |sign(cor_ij^(1))(cor_ij^(1))^2 - sign(cor_ij^(2))(cor_ij^(2))^2|^beta
cat("Step 2: Computing the adjacency difference matrix D...\n")
Step 2: Computing the adjacency difference matrix D...
# Get all unique genes (should be commonGenes from above)
allGenes <- colnames(datExpr1)
# Initialize D matrix with zeros
D_matrix <- matrix(0,
nrow = length(allGenes),
ncol = length(allGenes),
dimnames = list(allGenes, allGenes))
# Loop through all gene pairs to calculate D_ij
for (i in 1:length(allGenes)) {
for (j in i:length(allGenes)) { # Loop j from i for upper triangle (and symmetry)
gene_i <- allGenes[i]
gene_j <- allGenes[j]
# Get correlations for the pair
cor_ij_1 <- corMatrix1[gene_i, gene_j]
cor_ij_2 <- corMatrix2[gene_i, gene_j]
# Handle potential NA correlations
# If a correlation is NA in either condition, set differential to 0 for that pair
if (is.na(cor_ij_1) || is.na(cor_ij_2)) {
D_matrix[gene_i, gene_j] <- 0
D_matrix[gene_j, gene_i] <- 0
next
}
# Calculate signed squared correlations
s_ij_1 <- sign(cor_ij_1) * (cor_ij_1^2)
s_ij_2 <- sign(cor_ij_2) * (cor_ij_2^2)
# Calculate d_ij as per paper's formula: |s_ij_1 - s_ij_2|^beta
d_ij_val <- abs(s_ij_1 - s_ij_2)^beta
# Populate D_matrix symmetrically
D_matrix[gene_i, gene_j] <- d_ij_val
D_matrix[gene_j, gene_i] <- d_ij_val # Ensure symmetry
}
}
cat("Adjacency difference matrix D computed. Its range is [", min(D_matrix), ", ", max(D_matrix), "]\n\n")
Adjacency difference matrix D computed. Its range is [ 0 , 0.5001252 ]
# --- Step 3: Derive the Topological Overlap (TO) based dissimilarity matrix T from D ---
cat("Step 3: Computing Topological Overlap Dissimilarity (TOM) from D...\n")
Step 3: Computing Topological Overlap Dissimilarity (TOM) from D...
# It's good practice to ensure the diagonal is 1 for adjacency matrices before TOM
diag(D_matrix) <- 1 # Gene is perfectly "differentially coexpressed" with itself conceptually
# Use WGCNA's TOMsimilarity function on our custom differential adjacency matrix D.
# TOMType="unsigned" is generally appropriate for D_matrix values which are [0,1].
TOM_matrix <- TOMsimilarity(D_matrix, TOMType = "unsigned")
..connectivity..
..matrix multiplication (system BLAS)..
..normalization..
..done.
# Convert TOM to dissimilarity (1 - TOM)
T_dissimilarity <- 1 - TOM_matrix
cat("Topological Overlap Dissimilarity matrix T computed.\n\n")
Topological Overlap Dissimilarity matrix T computed.
# --- Step 4: Clustering and Module Identification ---
cat("Step 4: Performing hierarchical clustering and module identification...\n")
Step 4: Performing hierarchical clustering and module identification...
# Hierarchical clustering
geneTree_diffcoex <- hclust(as.dist(T_dissimilarity), method = "average")
# Plot the dendrogram
plot(geneTree_diffcoex, xlab = "", sub = "", main = "Gene dendrogram based on differential TOM",
labels = FALSE, hang = 0.04)
# Module identification using dynamicTreeCut
minModuleSize_diffcoex <- 10 # Adjust based on your data and desired module size
dynamicMods_diffcoex <- cutreeDynamic(dendro = geneTree_diffcoex,
distM = T_dissimilarity,
deepSplit = 2, # Adjust deepSplit for granularity (0-4)
pamRespectsDendro = FALSE, # Set to TRUE for PAM refinement
minClusterSize = minModuleSize_diffcoex)
..cutHeight not given, setting it to 0.955 ===> 99% of the (truncated) height range in dendro.
..done.
# Assign module colors (for visualization and further analysis)
moduleColors_diffcoex <- labels2colors(dynamicMods_diffcoex)
cat("Module identification complete.\n")
Module identification complete.
cat("Number of differential co-expression modules found:", length(unique(moduleColors_diffcoex)) - 1,
" (excluding grey for unassigned genes)\n") # -1 for 'grey' module
Number of differential co-expression modules found: 10 (excluding grey for unassigned genes)
cat("Sizes of modules:\n")
Sizes of modules:
print(table(moduleColors_diffcoex))
moduleColors_diffcoex
black blue brown green greenyellow magenta pink purple red
30 32 31 31 11 27 28 16 31
turquoise yellow
32 31
# Output the module assignments to a data frame
differential_modules <- data.frame(
Gene = colnames(datExpr1),
Module = moduleColors_diffcoex
)
# Save the dataframe to a CSV file
output_file <- "diffcoexp/moduleColors_synthetic_0.9.csv"
write.csv(differential_modules, file = output_file, row.names = F)
cat(paste0("\nModule assignments saved to ", output_file, "\n"))
Module assignments saved to diffcoexp/moduleColors_synthetic_0.9.csv
# --- New Section: Plotting Module-Trait Relationships ---
cat("\n--- Plotting Module-Trait Relationships ---\n")
--- Plotting Module-Trait Relationships ---
# Create a trait data frame for your samples
# The row names of the trait data frame must match the sample names in datExpr_combined
trait_data <- data.frame(
Sample = c(rownames(datExpr1), rownames(datExpr2)),
Condition = c(rep("Healthy", nrow(datExpr1)), rep("Diseased", nrow(datExpr2)))
)
rownames(trait_data) <- trait_data$Sample # Set sample names as row names
# Convert categorical trait to numeric for correlation (WGCNA expects numeric)
# 0 for Healthy, 1 for Diseased (arbitrary assignment)
trait_numeric <- as.data.frame(as.numeric(trait_data$Condition == "Diseased"))
rownames(trait_numeric) <- rownames(trait_data)
colnames(trait_numeric) <- "Diseased_vs_Healthy"
cat("Trait data prepared.\n")
Trait data prepared.
print(head(trait_numeric))
cat("\n")
# Calculate Module Eigengenes (MEs)
# MEs represent the "average" expression profile of each module.
# Use the combined expression data and the identified module colors.
MEs_diffcoex <- moduleEigengenes(datExpr_combined, moduleColors_diffcoex)$eigengenes
cat("Module Eigengenes calculated.\n")
Module Eigengenes calculated.
print(head(MEs_diffcoex))
cat("\n")
# Relate MEs to the trait (Condition)
# Calculate correlations and their p-values
moduleTraitCor <- WGCNA::cor(MEs_diffcoex, trait_numeric, use = "pairwise.complete.obs", method = "pearson") # Pearson is common for MEs
moduleTraitPvalue <- WGCNA::corPvalueStudent(moduleTraitCor, nrow(datExpr_combined)) # Number of samples
cat("Module-trait correlations and p-values calculated.\n\n")
Module-trait correlations and p-values calculated.
# Plotting the heatmap using labeledHeatmap
# Customize text matrix for correlation and p-value display
dev.off()
null device
1
png("diffcoexp/DiffCoExp_Synthetic_0.9.png",width=6,height=5,units="in",res=600)
textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
par(mar = c(6, 10, 3, 3))
par(mar = c(8, 8, 2, 1)) # Reduce margins (bottom, left, top, right)
labeledHeatmap(Matrix = moduleTraitCor,
xLabels = "Condition",
yLabels = names(MEs_diffcoex),
ySymbols = names(MEs_diffcoex),
colorLabels = FALSE,
colors = blueWhiteRed(50), # Red for positive, blue for negative correlation
textMatrix = textMatrix,
setStdMargins = FALSE,
cex.text = 0.5, # Adjust text size
zlim = c(-1, 1),
main = paste("DiffCoEx Modules")
)
cat("\nModule-Trait Relationship Heatmap generated.\n")
Module-Trait Relationship Heatmap generated.
dev.off()
null device
1
exprs.h = read.csv("dcor-method/synthetic_healthy_0.9.txt", sep = "\t", header=T, row.names = "Genes")
exprs.d = read.csv("dcor-method/synthetic_diseased_0.9.txt", sep = "\t", header=T, row.names = "Genes")
allowWGCNAThreads()
Allowing multi-threading with up to 8 threads.
# Diseased - Healthy; high correlation --> diseased
res.high =diffcoexp(exprs.1 = exprs.d, exprs.2 = exprs.h,
r.method = "pearson",q.method = "bonferroni")
Finished running comparecor.
Finished running coexpr.
3039 gene pairs remain after half thresholding.
711 DCLs identified.
12 DCGs identified.
evaluate_method <- function(true_positive_genes, predicted_positive_modules, gene_module_df) {
gene_module_df <- gene_module_df[!is.na(gene_module_df$gene) & !is.na(gene_module_df$module), ]
all_genes <- unique(gene_module_df$gene)
# Handle empty predicted modules
if (length(predicted_positive_modules) == 0) {
predicted_positive_genes <- character(0)
} else {
predicted_positive_genes <- gene_module_df$gene[gene_module_df$module %in% predicted_positive_modules]
}
TP <- sum(predicted_positive_genes %in% true_positive_genes)
FP <- sum(!(predicted_positive_genes %in% true_positive_genes))
FN <- sum(true_positive_genes %in% all_genes & !(true_positive_genes %in% predicted_positive_genes))
TN <- sum(!(all_genes %in% true_positive_genes) & !(all_genes %in% predicted_positive_genes))
precision <- ifelse((TP + FP) > 0, TP / (TP + FP), NA)
recall <- ifelse((TP + FN) > 0, TP / (TP + FN), NA)
return(data.frame(
TP = TP, FP = FP, TN = TN, FN = FN,
precision = precision,
recall = recall
))
}
# Inputs
synthetic_data = read.csv("synthetic_expression_data_effect_0.1.csv", header=T)
true_pos_diseased_genes <- synthetic_data$X[151:300]
# Module mapping
gene_module_files <- list(
WGCNA = "wgcna/moduleColors_WGCNA_0.1.csv",
DiffCoExp = "diffcoexp/moduleColors_synthetic_0.1.csv",
DGCA = "wgcna/moduleColors_WGCNA_0.1.csv",
Our_Method = list("dcor-method/modules_diseased_0.1.csv",
"dcor-method/modules_healthy_0.1.csv")
)
# Positively correlated modules
predicted_modules_by_method = list(
WGCNA = character(0),
DiffCoExp = character(0),
DGCA= c("brown"),
Our_Method = list(c("turquoise", "red", "magenta", "brown", "yellow", "purple"),
c("yellow", "blue", "red", "brown")
)
)
# Evaluation loop
results_list <- list()
for (method in names(gene_module_files)) {
files <- gene_module_files[[method]]
modules <- predicted_modules_by_method[[method]]
# Combine all dataframes for this method
combined_df <- data.frame()
for (i in seq_along(files)) {
df <- read.csv(files[[i]], stringsAsFactors = FALSE)
colnames(df) <- c("gene", "module")
# Append with file ID if needed (optional)
combined_df <- rbind(combined_df, df)
}
# Remove duplicates (in case same gene appears multiple times)
combined_df <- unique(combined_df)
# Determine combined predicted modules
if (is.character(modules)) {
predicted_modules <- modules
} else if (is.list(modules)) {
predicted_modules <- unique(unlist(modules))
} else {
stop(paste("Invalid predicted modules format for method", method))
}
# Evaluate once for the combined gene-module mapping
res <- evaluate_method(
true_positive_genes = true_pos_diseased_genes,
predicted_positive_modules = predicted_modules,
gene_module_df = combined_df
)
results_list[[method]] <- data.frame(
method = method,
TP = res$TP,
FP = res$FP,
TN = res$TN,
FN = res$FN,
precision = res$precision*100,
recall = res$recall*100,
row.names = NULL
)
}
# Combine all results into one summary data frame
summary_df <- do.call(rbind, results_list)
rownames(summary_df) <- summary_df$method
summary_df$method <- NULL
# View results
print(summary_df)
# Inputs
synthetic_data = read.csv("synthetic_expression_data_effect_0.9.csv", header=T)
true_pos_diseased_genes <- synthetic_data$X[151:300]
# Module mapping
gene_module_files <- list(
WGCNA = "wgcna/moduleColors_WGCNA_0.9.csv",
DiffCoExp = "diffcoexp/moduleColors_synthetic_0.9.csv",
DGCA = "wgcna/moduleColors_WGCNA_0.9.csv",
Our_Method = list("dcor-method/modules_diseased_0.9.csv",
"dcor-method/modules_healthy_0.9.csv")
)
# Positively correlated modules
predicted_modules_by_method = list(
WGCNA = c("blue", "black", "red", "magenta", "greenyellow", "purple"),
DiffCoExp = c("blue", "green", "greenyellow", "pink", "purple", "yellow"),
DGCA = c("turquoise","greenyellow","magenta","black","pink"),
Our_Method = list(c("blue", "green", "pink", "brown"),
c("pink", "blue", "yellow", "black")
)
)
# Evaluation loop
results_list <- list()
for (method in names(gene_module_files)) {
files <- gene_module_files[[method]]
modules <- predicted_modules_by_method[[method]]
# Combine all dataframes for this method
combined_df <- data.frame()
for (i in seq_along(files)) {
df <- read.csv(files[[i]], stringsAsFactors = FALSE)
colnames(df) <- c("gene", "module")
# Append with file ID if needed (optional)
combined_df <- rbind(combined_df, df)
}
# Remove duplicates (in case same gene appears multiple times)
combined_df <- unique(combined_df)
# Determine combined predicted modules
if (is.character(modules)) {
predicted_modules <- modules
} else if (is.list(modules)) {
predicted_modules <- unique(unlist(modules))
} else {
stop(paste("Invalid predicted modules format for method", method))
}
# Evaluate once for the combined gene-module mapping
res <- evaluate_method(
true_positive_genes = true_pos_diseased_genes,
predicted_positive_modules = predicted_modules,
gene_module_df = combined_df
)
results_list[[method]] <- data.frame(
method = method,
TP = res$TP,
FP = res$FP,
TN = res$TN,
FN = res$FN,
precision = res$precision*100,
recall = res$recall*100,
row.names = NULL
)
}
# Combine all results into one summary data frame
summary_df <- do.call(rbind, results_list)
rownames(summary_df) <- summary_df$method
summary_df$method <- NULL
# View results
print(summary_df)